Preliminaries

session info:

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.29     R6_2.5.1          jsonlite_1.8.0    magrittr_2.0.3   
##  [5] evaluate_0.15     stringi_1.7.8     rlang_1.0.6       cli_3.4.1        
##  [9] data.table_1.14.2 rstudioapi_0.13   jquerylib_0.1.4   bslib_0.3.1      
## [13] rmarkdown_2.14    tools_4.2.0       stringr_1.4.1     xfun_0.31        
## [17] yaml_2.3.5        fastmap_1.1.0     compiler_4.2.0    htmltools_0.5.2  
## [21] knitr_1.39        sass_0.4.1

Install and load packages

# install CRAN packages (if not yet installed)
sapply(c("data.table", "tidyverse", "devtools", "readxl", "kableExtra", "ngram", "networkD3", "igraph", "network", "patchwork", "koRpus", "pbapply", "tidytext", "cluster", "ggrepel", "animation", "vroom", "ggrepel", "Rtsne"), function(x) if(!is.element(x, installed.packages())) install.packages(x, dependencies = T, repos = "http://cran.us.r-project.org"))
## $data.table
## NULL
## 
## $tidyverse
## NULL
## 
## $devtools
## NULL
## 
## $readxl
## NULL
## 
## $kableExtra
## NULL
## 
## $ngram
## NULL
## 
## $networkD3
## NULL
## 
## $igraph
## NULL
## 
## $network
## NULL
## 
## $patchwork
## NULL
## 
## $koRpus
## NULL
## 
## $pbapply
## NULL
## 
## $tidytext
## NULL
## 
## $cluster
## NULL
## 
## $ggrepel
## NULL
## 
## $animation
## NULL
## 
## $vroom
## NULL
## 
## $ggrepel
## NULL
## 
## $Rtsne
## NULL
# install non-CRAN packages (if not yet installed)
if(!is.element("concordances", installed.packages())) {
devtools::install_github("hartmast/concordances")
}
# if this doesn't work, check sfla.ch for the package
if(!is.element("collostructions", installed.packages())) {
  install.packages("https://sfla.ch/wp-content/uploads/2021/02/collostructions_0.2.0.tar.gz", repos = NULL)
}
# install "concordances" if not yet installed
if(!is.element("concordances", installed.packages())) {
  devtools::install_github("hartmast/concordances", ref = "f4ca785
")
}

# install "wordVectors" if not yet installed
if(!is.element("wordVectors", installed.packages())) {
devtools::install_github("bmschmidt/wordVectors")
}

# load "concordances"
library(concordances)
library(tidyverse)
library(readxl)
library(data.table)
library(kableExtra)
library(collostructions)
library(wordVectors)
library(vroom)
library(ggrepel)
library(cluster)
library(patchwork)

ENCOW

Data retrieval

The data were retrieved from the NoSketchEngine instance of the COW corpora (https://www.webcorpora.org/) on Sept 27, 2019, using the ENCOW16B “World Englishes” corpus. The query was:

[word="[Tt]he"] "mother" "of" "all" []

The data were exported to the XML file mother_of_all.xml, which is imported in the subsequent step.

d <- getNSE("../data/mother_of_all.xml", xml = TRUE, context_tags = FALSE, verbose = FALSE)

Annotation

The concordance is exported for annotation, the annotated file is then read in.

# write_excel_csv(d, "mother_of_all_ENCOW.csv")
d <- read_xlsx("../data/mother_of_all_ENCOW.xlsx")

Data wrangling

We only keep the instances manually tagged as keep == “y”, excluding false hits and doubtful cases.

d <- filter(d, keep == "y")

Types, tokens, hapax legomena

# frequency table
d_tbl <- d %>% select(lemma) %>% table %>% as_tibble() %>% rename(c(Freq = "n")) %>% arrange(desc(Freq))
# overview table
tibble(
  # types
  Types = nrow(d_tbl),
  # tokens
  Tokens = sum(d_tbl$Freq),
# hapax legomena
  "Hapax Legomena" = length(which(d_tbl$Freq==1))
) %>% kbl() %>% kable_material(c("striped", "hover"))
Types Tokens Hapax Legomena
1669 4127 1092

Collostructional analysis

For performing a collostructional analysis, we have to know how often a lemma attested in the open slot of the mother of all construction occurs in the ENCOW corpus as a whole. For this purpose, we read in the ENCOW word list (available at webcorpora.org).

# not in repository,
# available at webcorpora.org after registration
# frequencies for relevant hits available at ../data/mother_of_all_with_encow_frequencies.csv
encow <- fread("/Volumes/My Passport/ENCOW word lists/encow16ax.lp.tsv",
               header = F)
# 
# head(encow)
# 
# # only nouns
# encow <- encow[V2 %in% c("NN", "NE")]
# 
# head(encow)

This is a huge database, but we only need nouns and adjectives, so we drop the rest in order to speed up the next calculations. Also, we add more self-explanatory column names:

# only nouns
encow <- encow[V2 %in% c("NN", "NE")]
colnames(encow) <- c("Lemma", "POS", "Freq")
# if something is attested both as NN and
# NE, sum them up
encow <- encow[, sum(Freq), by = Lemma]
setnames(encow, old = "V1", new = "Freq")

We compile a frequency table from our mother of all concordance d and combine it with the corpus frequencies in the encow table.

# get frequencies
d_tbl <- d %>% select(lemma) %>% table %>% sort(decreasing = T) %>% 
  as.data.frame(stringsAsFactors = F)
colnames(d_tbl) <- c("Lemma", "Freq_in_cxn")
# join tables
d_tbl <- left_join(d_tbl, encow, by = "Lemma")

These data are not without noise - ENCOW’s lemmatization is of course not perfect, but we can still expect that the relationships between construction frequencies and corpus frequencies of the words in question are roughly representative of their “actual” relationship in everyday use, at least in the text types represented in the corpus. However, given that the lemmatization is not perfect, there can be cases where the corpus frequency is lower than the construction frequency. This is of course not possible - if a word occurs, say, three times in a specific construction in corpus, its total frequency in the corpus cannot be lower than three! In one instance, however, this is the case in our data. The reason for this is that our concordance was lemmatized manually while for ENCOW, we rely on the automatic lemmatization. As this only affects one single attestation, we just exclude it.

# omit one case where corpus frequency is
# smaller than cxn frequency
d_tbl <- subset(d_tbl, d_tbl[,2] <= d_tbl[,3])

Next, we perform a collexeme analysis using Flach’s collostructions package.

# sum(encow$Freq): 1805183579
# perform collexeme analysis
collex(as.data.frame(d_tbl), corpsize = 1805183579) %>% kbl() %>%  
   kable_material(c("striped", "hover")) %>% scroll_box(width = "800px", height = "200px")
COLLEX CORP.FREQ OBS EXP ASSOC COLL.STR.LOGL SIGNIF
battle 577746 148 1.3 attr 1113.36172 *****
hangover 18482 62 0.0 attr 783.25739 *****
bubble 142649 50 0.3 attr 406.05799 *****
crisis 556466 60 1.3 attr 347.64395 *****
bailout 46997 28 0.1 attr 256.80882 *****
headache 95519 29 0.2 attr 227.08792 *****
adventure 274370 34 0.6 attr 206.06747 *****
cock-up 2875 15 0.0 attr 202.59408 *****
storm 246347 28 0.6 attr 164.91399 *****
bomb 239152 27 0.5 attr 158.66175 *****
demo 111771 22 0.3 attr 153.34106 *****
scandal 114916 20 0.3 attr 134.52810 *****
evil 186792 22 0.4 attr 131.07939 *****
crash 210888 21 0.5 attr 118.21208 *****
virtue 196117 20 0.4 attr 113.51010 *****
meltdown 24367 12 0.1 attr 105.46096 *****
recession 184076 18 0.4 attr 100.67254 *****
tantrum 14725 10 0.0 attr 94.28328 *****
fuck-up 389 6 0.0 attr 94.08983 *****
festival 298468 19 0.7 attr 90.42479 *****
sin 360715 20 0.8 attr 89.87169 *****
monad 10743 9 0.0 attr 88.62167 *****
boom 100080 14 0.2 attr 88.12070 *****
smokescreen 5722 8 0.0 attr 86.95459 *****
comeback 37283 11 0.1 attr 85.46378 *****
jam 69991 12 0.2 attr 80.33864 *****
church 1322436 29 3.0 attr 80.14827 *****
war 1733735 32 3.9 attr 78.65460 *****
scam 76079 12 0.2 attr 78.36408 *****
party 2127917 33 4.8 attr 71.14867 *****
oxymoron 7227 7 0.0 attr 70.95200 *****
vigil 16527 8 0.0 attr 70.02500 *****
shitstorm 783 5 0.0 attr 69.54475 *****
irony 79503 11 0.2 attr 68.99199 *****
parade 83951 11 0.2 attr 67.81424 *****
trip 849031 22 1.9 attr 67.42347 *****
non-sequitur 1122 5 0.0 attr 65.93924 *****
cliffhanger 5629 6 0.0 attr 61.96147 *****
democracy 349725 15 0.8 attr 60.03961 *****
mess-up 411 4 0.0 attr 59.01848 *****
theory 1051063 22 2.4 attr 58.93627 *****
run 635445 18 1.4 attr 58.09191 *****
thunderstorm 19060 7 0.0 attr 57.42423 *****
stitch-up 638 4 0.0 attr 55.48761 *****
bollocking 727 4 0.0 attr 54.44022 *****
squeeze 26124 7 0.1 attr 53.04161 *****
extinction 57634 8 0.1 attr 50.22109 *****
pandemic 16943 6 0.0 attr 48.78499 *****
cold 108202 9 0.2 attr 47.47732 *****
lie 227656 11 0.5 attr 46.51142 *****
whore 21215 6 0.0 attr 46.10552 *****
migraine 22445 6 0.1 attr 45.43465 *****
lawsuit 126192 9 0.3 attr 44.78956 *****
engine 890733 17 2.0 attr 42.73851 *****
conspiracy 142758 9 0.3 attr 42.64368 *****
mashup 3335 4 0.0 attr 42.24829 *****
letdown 3884 4 0.0 attr 41.03094 *****
KPI 13 2 0.0 attr 40.85466 *****
facepalm 577 3 0.0 attr 40.48907 *****
strike 315254 11 0.7 attr 39.74274 *****
toothache 4608 4 0.0 attr 39.66612 *****
mandate 114832 8 0.3 attr 39.44786 *****
rally 115688 8 0.3 attr 39.33287 *****
blog 873617 16 2.0 attr 39.06059 *****
firewall 43252 6 0.1 attr 37.65572 *****
crunch 43915 6 0.1 attr 37.47614 *****
punch-up 1126 3 0.0 attr 36.47241 *****
blowout 7209 4 0.0 attr 36.09628 *****
deal 1120817 17 2.5 attr 35.95853 *****
muck-up 55 2 0.0 attr 34.83412 *****
compresser 74 2 0.0 attr 33.62820 *****
fest 9988 4 0.0 attr 33.49973 *****
backlash 29811 5 0.1 attr 33.24914 *****
conflict 559033 12 1.3 attr 32.66448 *****
mess 179855 8 0.4 attr 32.56099 *****
blizzard 11996 4 0.0 attr 32.04298 *****
reorganisation 12123 4 0.0 attr 31.95929 *****
distraction 71026 6 0.2 attr 31.82824 *****
hazard 122614 7 0.3 attr 31.82724 *****
ballgame 2596 3 0.0 attr 31.46269 *****
election 857044 14 1.9 attr 31.38589 *****
boondoggle 2634 3 0.0 attr 31.37562 *****
burka 2638 3 0.0 attr 31.36653 *****
pothole 15950 4 0.0 attr 29.78135 *****
prank 16274 4 0.0 attr 29.62191 *****
raingear 211 2 0.0 attr 29.40212 *****
abomination 16830 4 0.0 attr 29.35562 *****
disaster 314162 9 0.7 attr 29.21588 *****
fundamentalism 17323 4 0.0 attr 29.12684 *****
show 1613739 18 3.6 attr 28.92828 *****
cover-up 17992 4 0.0 attr 28.82668 *****
understatement 18994 4 0.0 attr 28.39757 *****
piss-up 275 2 0.0 attr 28.33832 *****
row 334603 9 0.8 attr 28.17307 *****
injunction 51057 5 0.1 attr 27.96368 *****
rant 51084 5 0.1 attr 27.95852 *****
derby 20239 4 0.0 attr 27.89521 *****
demos 4734 3 0.0 attr 27.86595 *****
mother 1159790 15 2.6 attr 27.70920 *****
problem 4765704 32 10.7 attr 27.50818 *****
helter-skelter 356 2 0.0 attr 27.30272 *****
hoo-hah 371 2 0.0 attr 27.13725 *****
corruption 178092 7 0.4 attr 26.85088 *****
struggle 371955 9 0.8 attr 26.43598 *****
cuneiform-malapropisms 1 1 0.0 attr 26.00826 *****
Frankenstorage 1 1 0.0 attr 26.00826 *****
install 1 1 0.0 attr 26.00826 *****
locker-upper 1 1 0.0 attr 26.00826 *****
p-tool 1 1 0.0 attr 26.00826 *****
pluck-up 1 1 0.0 attr 26.00826 *****
RPV 1 1 0.0 attr 26.00826 *****
supervixen 1 1 0.0 attr 26.00826 *****
volkswagen 1 1 0.0 attr 26.00826 *****
which 1 1 0.0 attr 26.00826 *****
distortion 66702 5 0.2 attr 25.36100 *****
spider 70777 5 0.2 attr 24.78632 *****
invention 131000 6 0.3 attr 24.75184 *****
isopod 676 2 0.0 attr 24.73378 *****
whoredom 756 2 0.0 attr 24.28612 *****
scare 33542 4 0.1 attr 23.91324 *****
fight 438673 9 1.0 attr 23.76608 *****
dust-up 869 2 0.0 attr 23.72873 *****
dealkiller 2 1 0.0 attr 23.23567 *****
no-partner 2 1 0.0 attr 23.23567 *****
shipwreck 10693 3 0.0 attr 23.00279 *****
trap 89373 5 0.2 attr 22.53703 *****
bashing 11621 3 0.0 attr 22.50755 *****
stunt 40438 4 0.1 attr 22.44841 *****
site 3629658 25 8.2 attr 22.32446 *****
tory 3 1 0.0 attr 22.18918 *****
XenaFest 3 1 0.0 attr 22.18918 *****
science 1283303 14 2.9 attr 21.99592 *****
tsunami 43533 4 0.1 attr 21.87231 *****
monster 170147 6 0.4 attr 21.79021 *****
APC 1413 2 0.0 attr 21.78489 *****
rb 4 1 0.0 attr 21.50959 *****
hairball 1556 2 0.0 attr 21.39966 *****
trade 814855 11 1.8 attr 21.09473 *****
hormone 104901 5 0.2 attr 21.00484 *****
ctag 5 1 0.0 attr 21.00425 *****
paradox 50103 4 0.1 attr 20.77732 *****
sulk 1831 2 0.0 attr 20.74954 *****
fauxpas 6 1 0.0 attr 20.60154 *****
deception 51676 4 0.1 attr 20.53709 *****
market 2428217 19 5.5 attr 20.31802 *****
sandstorm 2047 2 0.0 attr 20.30423 *****
backburn 7 1 0.0 attr 20.26666 *****
brainfade 7 1 0.0 attr 20.26666 *****
mom-mobile 7 1 0.0 attr 20.26666 *****
pseudo-classification 7 1 0.0 attr 20.26666 *****
taxodium 7 1 0.0 attr 20.26666 *****
climb 55464 4 0.1 attr 19.98818 *****
nightmare 120716 5 0.3 attr 19.67169 *****
bonfire 18864 3 0.0 attr 19.63319 *****
crawl 19152 3 0.0 attr 19.54357 *****
scheme 890394 11 2.0 attr 19.48369 ****
cankle 11 1 0.0 attr 19.30631 ****
soduko 11 1 0.0 attr 19.30631 ****
garrison 20134 3 0.0 attr 19.24795 ****
binge 20139 3 0.0 attr 19.24648 ****
bump 61436 4 0.1 attr 19.19692 ****
bruise 21131 3 0.0 attr 18.96243 ****
play-down 13 1 0.0 attr 18.95739 ****
backlog 21197 3 0.0 attr 18.94401 ****
mall 64427 4 0.1 attr 18.83007 ****
gamble 21737 3 0.0 attr 18.79549 ****
biteback 15 1 0.0 attr 18.66042 ****
growth-spurt 15 1 0.0 attr 18.66042 ****
letterhack 15 1 0.0 attr 18.66042 ****
herring 22307 3 0.1 attr 18.64274 ****
cop-out 3170 2 0.0 attr 18.55917 ****
droner 16 1 0.0 attr 18.52699 ****
coverup 3243 2 0.0 attr 18.46840 ****
hike 70084 4 0.2 attr 18.18220 ****
Spruance 19 1 0.0 attr 18.17304 ****
dodo 3630 2 0.0 attr 18.01907 ****
depression 240027 6 0.5 attr 17.97530 ****
upset 25516 3 0.1 attr 17.85070 ****
FMA 23 1 0.0 attr 17.78149 ****
coincidence 74615 4 0.2 attr 17.70139 ****
psych-out 24 1 0.0 attr 17.69451 ****
SUV 26 1 0.0 attr 17.53114 ****
gad-fly 27 1 0.0 attr 17.45420 ****
towback 28 1 0.0 attr 17.38012 ****
hoax 27877 3 0.1 attr 17.33031 ****
bitemark 29 1 0.0 attr 17.30868 ****
shitfit 29 1 0.0 attr 17.30868 ****
NGO 31 1 0.0 attr 17.17303 ****
GIL 33 1 0.0 attr 17.04600 ****
stinkbomb 33 1 0.0 attr 17.04600 ****
victory 386555 7 0.9 attr 16.93825 ****
collapse 161699 5 0.4 attr 16.93300 ****
self-martyrdom 35 1 0.0 attr 16.92656 ****
miscalculation 4817 2 0.0 attr 16.89247 ****
hairdyer 36 1 0.0 attr 16.86941 ****
secret 266952 6 0.6 attr 16.82055 ****
pseudo-scandal 37 1 0.0 attr 16.81386 ****
zone-out 38 1 0.0 attr 16.75980 ****
network 1229692 12 2.8 attr 16.75717 ****
gubbing 40 1 0.0 attr 16.65588 ****
sinkhole 5145 2 0.0 attr 16.63039 ****
flip-flop 5515 2 0.0 attr 16.35422 ****
bajada 48 1 0.0 attr 16.28705 ****
super-heroine 49 1 0.0 attr 16.24538 ****
swedge 49 1 0.0 attr 16.24538 ****
noria 50 1 0.0 attr 16.20457 ****
tailchase 52 1 0.0 attr 16.12535 ****
cameltoe 55 1 0.0 attr 16.01213 ****
stuff-up 55 1 0.0 attr 16.01213 ****
awesomeness 6126 2 0.0 attr 15.93662 ****
riot 94443 4 0.2 attr 15.90530 ****
protest 293599 6 0.7 attr 15.79860 ****
upper-cut 62 1 0.0 attr 15.77048 ****
texaco 64 1 0.0 attr 15.70648 ****
queue 97310 4 0.2 attr 15.67895 ****
movie 1106208 11 2.5 attr 15.67865 ****
hyperinflation 6541 2 0.0 attr 15.67625 ****
fiesta 6614 2 0.0 attr 15.63218 ****
lip-lock 67 1 0.0 attr 15.61417 ****
betrayal 37597 3 0.1 attr 15.57923 ****
NDA 73 1 0.0 attr 15.44142 ****
hooley 74 1 0.0 attr 15.41403 ****
spoiler 39069 3 0.1 attr 15.35541 ****
turd 7187 2 0.0 attr 15.30237 ****
meglomaniac 80 1 0.0 attr 15.25711 ****
loofa 82 1 0.0 attr 15.20743 ****
hug 40316 3 0.1 attr 15.17250 ****
shocker 7527 2 0.0 attr 15.11898 ***
reform 449287 7 1.0 attr 15.11484 ***
short-covering 87 1 0.0 attr 15.08837 ***
rug 41438 3 0.1 attr 15.01284 ***
sandwich 106354 4 0.2 attr 15.00865 ***
delusion 41480 3 0.1 attr 15.00696 ***
ROI 92 1 0.0 attr 14.97600 ***
wiki 7880 2 0.0 attr 14.93722 ***
northeaster 98 1 0.0 attr 14.84900 ***
catch-up 8177 2 0.0 attr 14.79055 ***
sell-out 8258 2 0.0 attr 14.75148 ***
battering 104 1 0.0 attr 14.72958 ***
tirade 8556 2 0.0 attr 14.61101 ***
rollercoaster 8671 2 0.0 attr 14.55811 ***
souvlaki 117 1 0.0 attr 14.49300 ***
amusement 45443 3 0.1 attr 14.47728 ***
pumpage 119 1 0.0 attr 14.45897 ***
plague 46161 3 0.1 attr 14.38645 ***
rip-off 9219 2 0.0 attr 14.31542 ***
cyle 133 1 0.0 attr 14.23569 ***
tank 488418 7 1.1 attr 14.12163 ***
rigging 9749 2 0.0 attr 14.09419 ***
boy-toy 146 1 0.0 attr 14.04856 ***
downpour 10019 2 0.0 attr 13.98612 ***
bitch 50605 3 0.1 attr 13.85494 ***
broadband 125718 4 0.3 attr 13.75759 ***
brain-teaser 169 1 0.0 attr 13.75514 ***
beatboxer 171 1 0.0 attr 13.73155 ***
kegger 172 1 0.0 attr 13.71986 ***
doodah 175 1 0.0 attr 13.68519 ***
fart 11427 2 0.0 attr 13.46642 ***
scission 196 1 0.0 attr 13.45802 ***
MMO 203 1 0.0 attr 13.38769 ***
city 2182009 15 4.9 attr 13.33879 ***
puzzle 133097 4 0.3 attr 13.33448 ***
phish 212 1 0.0 attr 13.30076 ***
failure 693430 8 1.6 attr 13.27716 ***
hair-cut 219 1 0.0 attr 13.23567 ***
gunbarrel 223 1 0.0 attr 13.19940 ***
booksigning 225 1 0.0 attr 13.18151 ***
stoppage 12387 2 0.0 attr 13.14804 ***
sauce 137879 4 0.3 attr 13.07360 ***
monsoon 12781 2 0.0 attr 13.02456 ***
double-team 244 1 0.0 attr 13.01912 ***
mindfuck 246 1 0.0 attr 13.00277 ***
comet 59324 3 0.1 attr 12.94036 ***
vice 13138 2 0.0 attr 12.91596 ***
brunch 13173 2 0.0 attr 12.90547 ***
river 542138 7 1.2 attr 12.90226 ***
rule-breaker 261 1 0.0 attr 12.88422 ***
fisking 266 1 0.0 attr 12.84622 ***
package 546778 7 1.2 attr 12.80381 ***
scramble 13628 2 0.0 attr 12.77168 ***
goods 289 1 0.0 attr 12.68016 ***
dinger 291 1 0.0 attr 12.66636 ***
flamewar 312 1 0.0 attr 12.52686 ***
malaprop 313 1 0.0 attr 12.52045 ***
breakup 14562 2 0.0 attr 12.51071 ***
gracilis 327 1 0.0 attr 12.43286 ***
paralympics 327 1 0.0 attr 12.43286 ***
AO 328 1 0.0 attr 12.42675 ***
pyramid 65547 3 0.1 attr 12.36983 ***
comb-over 346 1 0.0 attr 12.31982 ***
yoga 66786 3 0.2 attr 12.26304 ***
wave 414144 6 0.9 attr 12.21265 ***
showdown 15918 2 0.0 attr 12.16065 ***
gerrymander 384 1 0.0 attr 12.11130 ***
snowbank 386 1 0.0 attr 12.10091 ***
PWC 388 1 0.0 attr 12.09057 ***
carve-up 392 1 0.0 attr 12.07005 ***
tongue-lashing 396 1 0.0 attr 12.04973 ***
self-promoter 401 1 0.0 attr 12.02463 ***
artifacting 418 1 0.0 attr 11.94157 ***
gyratory 428 1 0.0 attr 11.89427 ***
tweaker 435 1 0.0 attr 11.86182 ***
chilli 17198 2 0.0 attr 11.85702 ***
resolution 430357 6 1.0 attr 11.82474 ***
foul-up 447 1 0.0 attr 11.80739 ***
muse 17512 2 0.0 attr 11.78606 ***
shoot-em-up 479 1 0.0 attr 11.66910 ***
boom-and-bust 484 1 0.0 attr 11.64833 ***
superstring 485 1 0.0 attr 11.64420 ***
blockbuster 18356 2 0.0 attr 11.60157 ***
insult 75143 3 0.2 attr 11.59323 ***
fair 75180 3 0.2 attr 11.59044 ***
blackout 18639 2 0.0 attr 11.54164 ***
tear-jerker 527 1 0.0 attr 11.47812 ***
overbite 567 1 0.0 attr 11.33185 ***
catfight 571 1 0.0 attr 11.31780 ***
firebreak 574 1 0.0 attr 11.30732 ***
non-competition 582 1 0.0 attr 11.27965 ***
money-spinner 608 1 0.0 attr 11.19228 ***
anticyclone 609 1 0.0 attr 11.18900 ***
quickfire 609 1 0.0 attr 11.18900 ***
deflation 20709 2 0.0 attr 11.12968 ***
beatdown 646 1 0.0 attr 11.07111 ***
lasciviousness 653 1 0.0 attr 11.04957 ***
waterbed 659 1 0.0 attr 11.03129 ***
dilemma 83174 3 0.2 attr 11.02010 ***
DVR 696 1 0.0 attr 10.92212 ***
sitcom 21990 2 0.0 attr 10.89536 ***
stomachache 750 1 0.0 attr 10.77282 **
jeremiad 760 1 0.0 attr 10.74635 **
smasher 760 1 0.0 attr 10.74635 **
plymouth 788 1 0.0 attr 10.67407 **
debacle 23345 2 0.1 attr 10.66227 **
con 88773 3 0.2 attr 10.65440 **
cut-and-paste 799 1 0.0 attr 10.64638 **
ufo 838 1 0.0 attr 10.55118 **
disproportionality 842 1 0.0 attr 10.54167 **
frame-up 845 1 0.0 attr 10.53457 **
tie-breaker 846 1 0.0 attr 10.53220 **
pepper 91318 3 0.2 attr 10.49626 **
mistake 492038 6 1.1 attr 10.49483 **
clusterfuck 864 1 0.0 attr 10.49015 **
elephant 91516 3 0.2 attr 10.48416 **
bot 24545 2 0.1 attr 10.46716 **
beanstalk 876 1 0.0 attr 10.46260 **
MMORPG 881 1 0.0 attr 10.45124 **
nerf 925 1 0.0 attr 10.35391 **
revolution 341641 5 0.8 attr 10.26200 **
acronym 25920 2 0.1 attr 10.25531 **
olympics 997 1 0.0 attr 10.20424 **
match 507459 6 1.1 attr 10.19386 **
shithole 1006 1 0.0 attr 10.18630 **
whistler 1026 1 0.0 attr 10.14700 **
jape 1041 1 0.0 attr 10.11802 **
nap 27309 2 0.1 attr 10.05275 **
supervillain 1085 1 0.0 attr 10.03539 **
look-up 1089 1 0.0 attr 10.02804 **
leak 99646 3 0.2 attr 10.01007 **
glitch 27724 2 0.1 attr 9.99428 **
torte 1111 1 0.0 attr 9.98812 **
imbroglio 1112 1 0.0 attr 9.98633 **
superstition 28435 2 0.1 attr 9.89619 **
disappointment 102895 3 0.2 attr 9.83217 **
ha-ha 1248 1 0.0 attr 9.75608 **
heresy 29822 2 0.1 attr 9.71192 **
anticlimax 1290 1 0.0 attr 9.69004 **
ending 105844 3 0.2 attr 9.67590 **
craving 30235 2 0.1 attr 9.65877 **
carbuncle 1323 1 0.0 attr 9.63965 **
blowjob 1331 1 0.0 attr 9.62762 **
PPP 1370 1 0.0 attr 9.57002 **
birther 1388 1 0.0 attr 9.54398 **
celebration 225438 4 0.5 attr 9.53410 **
woodburner 1428 1 0.0 attr 9.48732 **
whitey 1439 1 0.0 attr 9.47202 **
illusion 111356 3 0.3 attr 9.39610 **
epic 111410 3 0.3 attr 9.39343 **
denial 112260 3 0.3 attr 9.35165 **
cult 112641 3 0.3 attr 9.33304 **
goop 1544 1 0.0 attr 9.33159 **
chipper 1571 1 0.0 attr 9.29703 **
twist 113702 3 0.3 attr 9.28156 **
kickstarter 1621 1 0.0 attr 9.23457 **
afro 1667 1 0.0 attr 9.17879 **
setback 34282 2 0.1 attr 9.17448 **
nitwit 1677 1 0.0 attr 9.16687 **
FPS 1735 1 0.0 attr 9.09911 **
boner 1741 1 0.0 attr 9.09223 **
boil 35055 2 0.1 attr 9.08877 **
misfortune 35222 2 0.1 attr 9.07051 **
Biennale 1770 1 0.0 attr 9.05931 **
backlist 1802 1 0.0 attr 9.02361 **
curveball 1821 1 0.0 attr 9.00272 **
strop 1874 1 0.0 attr 8.94556 **
fascism 36486 2 0.1 attr 8.93516 **
pictogram 1901 1 0.0 attr 8.91707 **
fit 247538 4 0.6 attr 8.88537 **
shindig 1954 1 0.0 attr 8.86229 **
cropper 1959 1 0.0 attr 8.85720 **
mistranslation 1995 1 0.0 attr 8.82094 **
book 4216872 20 9.5 attr 8.81945 **
checklist 37797 2 0.1 attr 8.79985 **
biennial 2022 1 0.0 attr 8.79416 **
saga 37879 2 0.1 attr 8.79155 **
joke 251361 4 0.6 attr 8.77996 **
dual 2050 1 0.0 attr 8.76678 **
grab 38131 2 0.1 attr 8.76616 **
budget 788931 7 1.8 attr 8.75972 **
chily 2160 1 0.0 attr 8.66271 **
lister 2180 1 0.0 attr 8.64436 **
slugger 2241 1 0.0 attr 8.58943 **
ripper 2264 1 0.0 attr 8.56911 **
wrangle 2294 1 0.0 attr 8.54291 **
preservationist 2306 1 0.0 attr 8.53253 **
put-down 2317 1 0.0 attr 8.52306 **
hugger 2328 1 0.0 attr 8.51363 **
motte 2383 1 0.0 attr 8.46717 **
chinook 2397 1 0.0 attr 8.45551 **
waterfall 41422 2 0.1 attr 8.44983 **
dump 42083 2 0.1 attr 8.38948 **
tamale 2488 1 0.0 attr 8.38138 **
wreck 42177 2 0.1 attr 8.38097 **
meet 42284 2 0.1 attr 8.37132 **
shtick 2528 1 0.0 attr 8.34966 **
crab 42658 2 0.1 attr 8.33778 **
comeuppance 2566 1 0.0 attr 8.31999 **
tale 269156 4 0.6 attr 8.31280 **
hypocrisy 43518 2 0.1 attr 8.26181 **
hillfort 2676 1 0.0 attr 8.23652 **
militarist 2684 1 0.0 attr 8.23058 **
op 44127 2 0.1 attr 8.20896 **
flowchart 2757 1 0.0 attr 8.17723 **
wretchedness 2780 1 0.0 attr 8.16071 **
boycott 44892 2 0.1 attr 8.14365 **
whopper 2807 1 0.0 attr 8.14150 **
illegitimacy 2832 1 0.0 attr 8.12388 **
phone-in 2841 1 0.0 attr 8.11757 **
cave 140914 3 0.3 attr 8.11657 **
finale 45289 2 0.1 attr 8.11021 **
argument 1067699 8 2.4 attr 8.05404 **
chase 46297 2 0.1 attr 8.02670 **
fanfiction 3048 1 0.0 attr 7.97782 **
disputation 3083 1 0.0 attr 7.95514 **
re-write 3098 1 0.0 attr 7.94550 **
mountain 453534 5 1.0 attr 7.93219 **
grievance 47523 2 0.1 attr 7.92767 **
musical 47645 2 0.1 attr 7.91796 **
validator 3142 1 0.0 attr 7.91748 **
monthly 3238 1 0.0 attr 7.85771 **
grizzly 3245 1 0.0 attr 7.85343 **
phage 3259 1 0.0 attr 7.84488 **
downturn 48575 2 0.1 attr 7.84482 **
sander 3309 1 0.0 attr 7.81465 **
fightback 3335 1 0.0 attr 7.79911 **
mix-up 3369 1 0.0 attr 7.77897 **
tattoo 49431 2 0.1 attr 7.77879 **
lode 3388 1 0.0 attr 7.76781 **
blowback 3460 1 0.0 attr 7.72607 **
huff 3488 1 0.0 attr 7.71007 **
sell-off 3514 1 0.0 attr 7.69533 **
fraudster 3519 1 0.0 attr 7.69251 **
domestic 3635 1 0.0 attr 7.62816 **
defeat 154704 3 0.3 attr 7.61844 **
squall 3729 1 0.0 attr 7.57752 **
sale 1358384 9 3.1 attr 7.55459 **
final 52530 2 0.1 attr 7.54951 **
omission 52718 2 0.1 attr 7.53607 **
crazy 3817 1 0.0 attr 7.53126 **
spill 52804 2 0.1 attr 7.52993 **
adware 3832 1 0.0 attr 7.52348 **
error 675806 6 1.5 attr 7.51309 **
post-mortem 3870 1 0.0 attr 7.50391 **
FS 3917 1 0.0 attr 7.47998 **
jackal 3919 1 0.0 attr 7.47896 **
roadshow 3950 1 0.0 attr 7.46334 **
optimization 53982 2 0.1 attr 7.44698 **
heatwave 4077 1 0.0 attr 7.40062 **
betta 4102 1 0.0 attr 7.38850 **
granddad 4133 1 0.0 attr 7.37358 **
oilfield 4166 1 0.0 attr 7.35782 **
question 3614728 17 8.1 attr 7.34254 **
rainstorm 4266 1 0.0 attr 7.31083 **
memoir 56033 2 0.1 attr 7.30705 **
spreadsheet 56359 2 0.1 attr 7.28531 **
slapping 4323 1 0.0 attr 7.28453 **
game 3627561 17 8.2 attr 7.27970 **
potluck 4345 1 0.0 attr 7.27448 **
bill 913599 7 2.1 attr 7.26636 **
mod 56808 2 0.1 attr 7.25559 **
broadcaster 57121 2 0.1 attr 7.23502 **
thriller 57326 2 0.1 attr 7.22161 **
non-starter 4470 1 0.0 attr 7.21831 **
religion 699888 6 1.6 attr 7.20122 **
parasite 57685 2 0.1 attr 7.19825 **
decider 4535 1 0.0 attr 7.18973 **
dolly 4608 1 0.0 attr 7.15811 **
ribbon 58380 2 0.1 attr 7.15348 **
leaderboard 4626 1 0.0 attr 7.15040 **
lock-in 4633 1 0.0 attr 7.14740 **
moisturiser 4660 1 0.0 attr 7.13590 **
database 503564 5 1.1 attr 7.11082 **
harlot 4800 1 0.0 attr 7.07733 **
confrontation 59889 2 0.1 attr 7.05819 **
invective 4849 1 0.0 attr 7.05723 **
RPG 4867 1 0.0 attr 7.04990 **
illness 326144 4 0.7 attr 7.03279 **
reshuffle 4937 1 0.0 attr 7.02165 **
escarpment 4951 1 0.0 attr 7.01605 **
hood 60908 2 0.1 attr 6.99528 **
heatsink 5038 1 0.0 attr 6.98160 **
scrounger 5060 1 0.0 attr 6.97298 **
rectification 5068 1 0.0 attr 6.96986 **
bender 5070 1 0.0 attr 6.96908 **
system 5260283 22 11.8 attr 6.96189 **
basilica 5093 1 0.0 attr 6.96013 **
hurricane 61541 2 0.1 attr 6.95678 **
cookware 5238 1 0.0 attr 6.90463 **
slip 62689 2 0.1 attr 6.88801 **
sinner 62820 2 0.1 attr 6.88025 **
collectible 5321 1 0.0 attr 6.87356 **
recovery 334569 4 0.8 attr 6.86666 **
reinvention 5349 1 0.0 attr 6.86319 **
monetization 5357 1 0.0 attr 6.86023 **
omelette 5385 1 0.0 attr 6.84993 **
indiscretion 5416 1 0.0 attr 6.83859 **
hammering 5439 1 0.0 attr 6.83022 **
mitzvah 5444 1 0.0 attr 6.82840 **
whitewash 5489 1 0.0 attr 6.81214 **
trickster 5490 1 0.0 attr 6.81178 **
disease 958551 7 2.2 attr 6.79608 **
flood 181842 3 0.4 attr 6.77079 **
genocide 64857 2 0.1 attr 6.76177 **
cataclysm 5671 1 0.0 attr 6.74772 **
chiller 5714 1 0.0 attr 6.73280 **
enema 5714 1 0.0 attr 6.73280 **
mullah 5770 1 0.0 attr 6.71354 **
Islamism 5789 1 0.0 attr 6.70705 **
wildcard 5812 1 0.0 attr 6.69923 **
contest 184964 3 0.4 attr 6.68270 **
ride 345184 4 0.8 attr 6.66454 **
retriever 5917 1 0.0 attr 6.66389 **
conflagration 5923 1 0.0 attr 6.66189 **
sprayer 5926 1 0.0 attr 6.66089 **
torturer 5933 1 0.0 attr 6.65856 **
zone 346672 4 0.8 attr 6.63682 **
clean-up 6013 1 0.0 attr 6.63213
firestorm 6022 1 0.0 attr 6.62918
silencer 6051 1 0.0 attr 6.61970
cacophony 6104 1 0.0 attr 6.60249
race 980342 7 2.2 attr 6.57938
gazebo 6187 1 0.0 attr 6.57585
mallet 6252 1 0.0 attr 6.55524
fantasy 189877 3 0.4 attr 6.54752
garlic 68928 2 0.2 attr 6.53658
slasher 6314 1 0.0 attr 6.53578
opportunity 2026453 11 4.6 attr 6.49678
zine 6449 1 0.0 attr 6.49408
squabble 6500 1 0.0 attr 6.47855
subwoofer 6527 1 0.0 attr 6.47038
gaffe 6537 1 0.0 attr 6.46736
searchlight 6567 1 0.0 attr 6.45834
remap 6596 1 0.0 attr 6.44966
sit-in 6630 1 0.0 attr 6.43953
condiment 6724 1 0.0 attr 6.41179
sunburn 6759 1 0.0 attr 6.40157
rumor 71540 2 0.2 attr 6.39956
fucker 6782 1 0.0 attr 6.39487
refit 6812 1 0.0 attr 6.38618
neurosis 6824 1 0.0 attr 6.38271
aggregator 6840 1 0.0 attr 6.37810
keystone 6840 1 0.0 attr 6.37810
list 1772131 10 4.0 attr 6.36604
alibi 6886 1 0.0 attr 6.36490
gift 556558 5 1.3 attr 6.34860
firefly 6991 1 0.0 attr 6.33511
myth 198073 3 0.4 attr 6.33084
fanzine 7030 1 0.0 attr 6.32416
triangle 73031 2 0.2 attr 6.32376
alert 73155 2 0.2 attr 6.31753
burrito 7135 1 0.0 attr 6.29497
weekly 7142 1 0.0 attr 6.29305
cloud 366211 4 0.8 attr 6.28608
dessert 73858 2 0.2 attr 6.28244
altercation 7404 1 0.0 attr 6.22216
hyperlink 7472 1 0.0 attr 6.20418
redhead 7577 1 0.0 attr 6.17675
wipe 7578 1 0.0 attr 6.17649
gridlock 7612 1 0.0 attr 6.16769
curation 7662 1 0.0 attr 6.15482
nationalisation 7776 1 0.0 attr 6.12579
chile 7920 1 0.0 attr 6.08974
tarp 7963 1 0.0 attr 6.07910
trinket 8000 1 0.0 attr 6.07000
hornet 8051 1 0.0 attr 6.05751
concession 78583 2 0.2 attr 6.05565
telescope 78768 2 0.2 attr 6.04708
sleight 8193 1 0.0 attr 6.02318
wobble 8221 1 0.0 attr 6.01649
fraud 210809 3 0.5 attr 6.01424
excuse 211850 3 0.5 attr 5.98937
dictionary 80259 2 0.2 attr 5.97878
shootout 8423 1 0.0 attr 5.96884
defection 8490 1 0.0 attr 5.95330
workout 81114 2 0.2 attr 5.94024
debugger 8591 1 0.0 attr 5.93010
paddy 8604 1 0.0 attr 5.92713
January 8679 1 0.0 attr 5.91011
typhoon 8692 1 0.0 attr 5.90718
summation 8785 1 0.0 attr 5.88631
meatball 8814 1 0.0 attr 5.87985
drought 82589 2 0.2 attr 5.87480
whack 8871 1 0.0 attr 5.86721
bazaar 8910 1 0.0 attr 5.85861
triathlon 9041 1 0.0 attr 5.83001
refinancing 9065 1 0.0 attr 5.82482
inquiry 219635 3 0.5 attr 5.80787
tightening 9145 1 0.0 attr 5.80760
quango 9199 1 0.0 attr 5.79607
contagion 9215 1 0.0 attr 5.79267
cauldron 9297 1 0.0 attr 5.77532
ruse 9297 1 0.0 attr 5.77532
femme 9376 1 0.0 attr 5.75875
jolt 9394 1 0.0 attr 5.75499
crime 831913 6 1.9 attr 5.72136
rumble 9570 1 0.0 attr 5.71866
rationalization 9588 1 0.0 attr 5.71498
newsgroup 9593 1 0.0 attr 5.71396
flu 87146 2 0.2 attr 5.68047
sunspot 9799 1 0.0 attr 5.67239
crossword 9855 1 0.0 attr 5.66125
maniac 10110 1 0.0 attr 5.61130
stopper 10132 1 0.0 attr 5.60705
purveyor 10164 1 0.0 attr 5.60089
traditionalist 10196 1 0.0 attr 5.59475
ballpark 10282 1 0.0 attr 5.57833
predisposition 10377 1 0.0 attr 5.56037
sampler 10429 1 0.0 attr 5.55060
conference 849905 6 1.9 attr 5.54552
workhouse 10499 1 0.0 attr 5.53754
dispute 232753 3 0.5 attr 5.51883
shambles 10651 1 0.0 attr 5.50947
weapon 624499 5 1.4 attr 5.50244
refutation 10708 1 0.0 attr 5.49905
devaluation 10796 1 0.0 attr 5.48308
henchman 10810 1 0.0 attr 5.48055
shock 234892 3 0.5 attr 5.47357
prick 11006 1 0.0 attr 5.44549
extravaganza 11055 1 0.0 attr 5.43683
multinational 11062 1 0.0 attr 5.43560
hubris 11067 1 0.0 attr 5.43471
shenanigan 11113 1 0.0 attr 5.42663
in-law 11404 1 0.0 attr 5.37624
purge 11411 1 0.0 attr 5.37504
store 867928 6 2.0 attr 5.37478
shaker 11470 1 0.0 attr 5.36499
event 2817449 13 6.3 attr 5.35467
ceremony 240900 3 0.5 attr 5.34907
deluge 11647 1 0.0 attr 5.33516
outfit 96872 2 0.2 attr 5.30101
introspection 11859 1 0.0 attr 5.30003
casserole 11864 1 0.0 attr 5.29921
spanner 11899 1 0.0 attr 5.29348
album 643865 5 1.4 attr 5.28417
boost 98125 2 0.2 attr 5.25525
hunch 12138 1 0.0 attr 5.25478
enigma 12161 1 0.0 attr 5.25110
iron 247618 3 0.6 attr 5.21426
moustache 12503 1 0.0 attr 5.19717
slur 12717 1 0.0 attr 5.16419
hijacker 12725 1 0.0 attr 5.16296
tome 12766 1 0.0 attr 5.15672
app 100958 2 0.2 attr 5.15415
lexicon 12784 1 0.0 attr 5.15398
destination 250916 3 0.6 attr 5.14972
teaser 12848 1 0.0 attr 5.14428
take 101435 2 0.2 attr 5.13744
clamp 13026 1 0.0 attr 5.11756
catalogue 102366 2 0.2 attr 5.10508
homecoming 13165 1 0.0 attr 5.09696
catfish 13168 1 0.0 attr 5.09651
fear 661936 5 1.5 attr 5.08867
fool 103112 2 0.2 attr 5.07940
by-election 13317 1 0.0 attr 5.07468
tofu 13380 1 0.0 attr 5.06552
park 666069 5 1.5 attr 5.04502
sub 104198 2 0.2 attr 5.04237
fag 13572 1 0.0 attr 5.03789
sitter 13699 1 0.0 attr 5.01983
bastion 13822 1 0.0 attr 5.00251
index 260181 3 0.6 attr 4.97385
panic 106325 2 0.2 attr 4.97112
blocker 14113 1 0.0 attr 4.96215
handshake 14130 1 0.0 attr 4.95982
alien 107068 2 0.2 attr 4.94661
needle 108714 2 0.2 attr 4.89299
rake 14732 1 0.0 attr 4.87908
issue 3875541 16 8.7 attr 4.86778
basket 109531 2 0.2 attr 4.86672
conundrum 14861 1 0.0 attr 4.86222
but 14912 1 0.0 attr 4.85560
choke 14924 1 0.0 attr 4.85405
ambush 14940 1 0.0 attr 4.85198
reveal 15190 1 0.0 attr 4.81991
mismatch 15298 1 0.0 attr 4.80623
bash 15335 1 0.0 attr 4.80156
riddle 15354 1 0.0 attr 4.79917
encore 15477 1 0.0 attr 4.78376
concussion 15741 1 0.0 attr 4.75113
orb 15764 1 0.0 attr 4.74831
detour 15765 1 0.0 attr 4.74819
idolatry 15792 1 0.0 attr 4.74489
bombing 114064 2 0.3 attr 4.72491
piss 15987 1 0.0 attr 4.72122
conglomerate 15993 1 0.0 attr 4.72049
convention 275106 3 0.6 attr 4.70634
holiday 701366 5 1.6 attr 4.68744
compromise 115728 2 0.3 attr 4.67446
delight 115979 2 0.3 attr 4.66693
singularity 16478 1 0.0 attr 4.66293
fixation 16597 1 0.0 attr 4.64907
inversion 16733 1 0.0 attr 4.63336
makeover 16852 1 0.0 attr 4.61972
hamburger 16993 1 0.0 attr 4.60369
cramp 17033 1 0.0 attr 4.59917
blower 17045 1 0.0 attr 4.59781
snag 17206 1 0.0 attr 4.57974
skyline 17413 1 0.0 attr 4.55675
rave 17484 1 0.0 attr 4.54893
ass 120330 2 0.3 attr 4.53919
breakout 17707 1 0.0 attr 4.52459
screwdriver 17711 1 0.0 attr 4.52415
crack 121117 2 0.3 attr 4.51666
blunder 17908 1 0.0 attr 4.50292
amp 121674 2 0.3 attr 4.50081
blister 18129 1 0.0 attr 4.47938
mercenary 18225 1 0.0 attr 4.46925
manual 123548 2 0.3 attr 4.44811
relapse 18658 1 0.0 attr 4.42423
pivot 18742 1 0.0 attr 4.41563
tip 498525 4 1.1 attr 4.41383
wildfire 18793 1 0.0 attr 4.41042
puddle 18816 1 0.0 attr 4.40808
meme 18838 1 0.0 attr 4.40584
powerhouse 18911 1 0.0 attr 4.39843
suit 293658 3 0.7 attr 4.39826
wickedness 19121 1 0.0 attr 4.37729
license 297654 3 0.7 attr 4.33515
bigot 19604 1 0.0 attr 4.32957
kick 128027 2 0.3 attr 4.32582
birdie 19730 1 0.0 attr 4.31733
gathering 128887 2 0.3 attr 4.30291
robot 129165 2 0.3 attr 4.29554
side-effect 20039 1 0.0 attr 4.28764
ploy 20183 1 0.0 attr 4.27396
split 130163 2 0.3 attr 4.26925
clutter 20364 1 0.0 attr 4.25692
jihad 20494 1 0.0 attr 4.24478
slump 20535 1 0.0 attr 4.24097
slug 20580 1 0.0 attr 4.23679
typeface 20639 1 0.0 attr 4.23133
ill 20785 1 0.0 attr 4.21789
launcher 20863 1 0.0 attr 4.21075
orgasm 20912 1 0.0 attr 4.20628
vortex 21014 1 0.0 attr 4.19701
attack 1010435 6 2.3 attr 4.19148
apology 133316 2 0.3 attr 4.18770
calamity 21308 1 0.0 attr 4.17054
payout 21503 1 0.0 attr 4.15320
talk 759566 5 1.7 attr 4.15210
provocation 21535 1 0.0 attr 4.15037
crackdown 21559 1 0.0 attr 4.14825
parachute 21933 1 0.0 attr 4.11553
restoration 136187 2 0.3 attr 4.11539
raffle 22097 1 0.0 attr 4.10137
story 2173180 10 4.9 attr 4.08914
holocaust 22254 1 0.1 attr 4.08792
campaign 1025257 6 2.3 attr 4.08341
perch 22401 1 0.1 attr 4.07541
payoff 22417 1 0.1 attr 4.07406
lib 22540 1 0.1 attr 4.06367
conduit 22598 1 0.1 attr 4.05879
axiom 22783 1 0.1 attr 4.04331
rift 22796 1 0.1 attr 4.04223
slap 22947 1 0.1 attr 4.02971
museum 318991 3 0.7 attr 4.01578
retail 23144 1 0.1 attr 4.01350
film 2192482 10 4.9 attr 3.99909
hill 321486 3 0.7 attr 3.98026
tweak 23657 1 0.1 attr 3.97196
farce 23732 1 0.1 attr 3.96596
giveaway 23768 1 0.1 attr 3.96309
infection 323468 3 0.7 attr 3.95230
fiddle 24097 1 0.1 attr 3.93708
job 2832107 12 6.4 attr 3.93675
wig 24175 1 0.1 attr 3.93097
intrigue 24381 1 0.1 attr 3.91493
swan 24541 1 0.1 attr 3.90256
flag 327187 3 0.7 attr 3.90045
upheaval 24827 1 0.1 attr 3.88068
lobster 24835 1 0.1 attr 3.88007
compound 146133 2 0.3 attr 3.87820
plug-in 24872 1 0.1 attr 3.87726
extremism 24884 1 0.1 attr 3.87635
hypocrite 24896 1 0.1 attr 3.87544
bun 24960 1 0.1 attr 3.87059
quake 25076 1 0.1 attr 3.86184
typo 25208 1 0.1 attr 3.85193
iceberg 25330 1 0.1 attr 3.84283
accelerator 25388 1 0.1 attr 3.83851 ns
gimmick 25527 1 0.1 attr 3.82822 ns
airport 333781 3 0.8 attr 3.81040 ns
tragedy 149375 2 0.3 attr 3.80502 ns
hex 25909 1 0.1 attr 3.80023 ns
risk 1640802 8 3.7 attr 3.75645 ns
pneumonia 26750 1 0.1 attr 3.74013 ns
turnaround 27055 1 0.1 attr 3.71882 ns
chant 27492 1 0.1 attr 3.68874 ns
fraternity 27553 1 0.1 attr 3.68459 ns
ADHD 27685 1 0.1 attr 3.67562 ns
investigation 569043 4 1.3 attr 3.67271 ns
cruiser 27959 1 0.1 attr 3.65716 ns
riff 28010 1 0.1 attr 3.65374 ns
crosse 28314 1 0.1 attr 3.63352 ns
banquet 28382 1 0.1 attr 3.62903 ns
ulcer 28421 1 0.1 attr 3.62646 ns
divorce 158522 2 0.4 attr 3.60846 ns
circle 349976 3 0.8 attr 3.59900 ns
bias 159318 2 0.4 attr 3.59200 ns
annoyance 28967 1 0.1 attr 3.59086 ns
carnival 29028 1 0.1 attr 3.58692 ns
exercise 578681 4 1.3 attr 3.58171 ns
equation 160815 2 0.4 attr 3.56133 ns
necklace 29451 1 0.1 attr 3.55989 ns
cartel 29532 1 0.1 attr 3.55477 ns
cookbook 29680 1 0.1 attr 3.54543 ns
scooter 29887 1 0.1 attr 3.53246 ns
curry 30136 1 0.1 attr 3.51699 ns
sprint 30525 1 0.1 attr 3.49309 ns
venture 166376 2 0.4 attr 3.45038 ns
crossover 31259 1 0.1 attr 3.44887 ns
surprise 362314 3 0.8 attr 3.44664 ns
perfume 31343 1 0.1 attr 3.44388 ns
moron 31644 1 0.1 attr 3.42612 ns
mural 31675 1 0.1 attr 3.42430 ns
cleansing 31703 1 0.1 attr 3.42266 ns
workstation 31940 1 0.1 attr 3.40883 ns
pathology 32242 1 0.1 attr 3.39137 ns
traitor 32289 1 0.1 attr 3.38867 ns
string 367861 3 0.8 attr 3.38044 ns
goody 32589 1 0.1 attr 3.37152 ns
knob 32604 1 0.1 attr 3.37067 ns
flute 32635 1 0.1 attr 3.36891 ns
intro 32885 1 0.1 attr 3.35477 ns
erection 32988 1 0.1 attr 3.34898 ns
double 33035 1 0.1 attr 3.34634 ns
loophole 33170 1 0.1 attr 3.33880 ns
shutdown 33197 1 0.1 attr 3.33729 ns
plaster 33338 1 0.1 attr 3.32945 ns
caveat 33392 1 0.1 attr 3.32645 ns
correctness 33395 1 0.1 attr 3.32629 ns
schema 33493 1 0.1 attr 3.32087 ns
terrorism 173274 2 0.4 attr 3.31893 ns
flare 33908 1 0.1 attr 3.29811 ns
pilgrimage 33956 1 0.1 attr 3.29549 ns
rag 34610 1 0.1 attr 3.26028 ns
charger 34695 1 0.1 attr 3.25576 ns
disco 34839 1 0.1 attr 3.24812 ns
symmetry 34955 1 0.1 attr 3.24200 ns
fallacy 35083 1 0.1 attr 3.23526 ns
fail 35508 1 0.1 attr 3.21309 ns
down 35583 1 0.1 attr 3.20921 ns
plane 383013 3 0.9 attr 3.20645 ns
complex 179661 2 0.4 attr 3.20288 ns
mortgage 384749 3 0.9 attr 3.18712 ns
flick 36173 1 0.1 attr 3.17898 ns
kettle 36401 1 0.1 attr 3.16744 ns
tournament 181889 2 0.4 attr 3.16361 ns
theorem 36805 1 0.1 attr 3.14718 ns
tornado 36831 1 0.1 attr 3.14588 ns
rash 37368 1 0.1 attr 3.11935 ns
climax 37507 1 0.1 attr 3.11255 ns
crane 37619 1 0.1 attr 3.10709 ns
mantra 37717 1 0.1 attr 3.10233 ns
relic 38255 1 0.1 attr 3.07643 ns
flap 38618 1 0.1 attr 3.05917 ns
stick 188383 2 0.4 attr 3.05252 ns
temple 189191 2 0.4 attr 3.03903 ns
mast 39586 1 0.1 attr 3.01401 ns
yacht 39763 1 0.1 attr 3.00589 ns
mitigation 39789 1 0.1 attr 3.00470 ns
bible 39972 1 0.1 attr 2.99634 ns
misconception 40073 1 0.1 attr 2.99175 ns
pudding 40097 1 0.1 attr 2.99066 ns
interrogation 40349 1 0.1 attr 2.97927 ns
saw 40527 1 0.1 attr 2.97126 ns
sweep 40630 1 0.1 attr 2.96665 ns
stupidity 40938 1 0.1 attr 2.95293 ns
wedge 41042 1 0.1 attr 2.94833 ns
linkage 41520 1 0.1 attr 2.92732 ns
bully 42211 1 0.1 attr 2.89742 ns
rogue 42506 1 0.1 attr 2.88482 ns
aquarium 42771 1 0.1 attr 2.87358 ns
misuse 43027 1 0.1 attr 2.86280 ns
steroid 43422 1 0.1 attr 2.84630 ns
mound 43683 1 0.1 attr 2.83549 ns
shortcut 43794 1 0.1 attr 2.83091 ns
troll 44063 1 0.1 attr 2.81987 ns
hack 44368 1 0.1 attr 2.80745 ns
reversal 44521 1 0.1 attr 2.80126 ns
pedal 44691 1 0.1 attr 2.79440 ns
carrier 205877 2 0.5 attr 2.77604 ns
kill 45210 1 0.1 attr 2.77364 ns
attainment 45368 1 0.1 attr 2.76738 ns
reunion 45924 1 0.1 attr 2.74552 ns
racist 46097 1 0.1 attr 2.73878 ns
capsule 46235 1 0.1 attr 2.73342 ns
offensive 46679 1 0.1 attr 2.71630 ns
refurbishment 46817 1 0.1 attr 2.71102 ns
staircase 46837 1 0.1 attr 2.71025 ns
competition 686798 4 1.5 attr 2.69789 ns
prostitute 47236 1 0.1 attr 2.69509 ns
art 1561601 7 3.5 attr 2.67595 ns
attraction 212867 2 0.5 attr 2.67395 ns
Sunday 48047 1 0.1 attr 2.66469 ns
toast 48156 1 0.1 attr 2.66065 ns
hat 213810 2 0.5 attr 2.66051 ns
downside 48625 1 0.1 attr 2.64337 ns
wrap 49274 1 0.1 attr 2.61978 ns
kiss 49788 1 0.1 attr 2.60134 ns
slaughter 49972 1 0.1 attr 2.59479 ns
streak 50519 1 0.1 attr 2.57548 ns
buffet 50656 1 0.1 attr 2.57068 ns
catastrophe 50750 1 0.1 attr 2.56739 ns
roundabout 50924 1 0.1 attr 2.56133 ns
bust 51422 1 0.1 attr 2.54411 ns
horror 222592 2 0.5 attr 2.53903 ns
haven 51796 1 0.1 attr 2.53130 ns
narrative 224165 2 0.5 attr 2.51794 ns
robbery 52274 1 0.1 attr 2.51508 ns
chick 52887 1 0.1 attr 2.49452 ns
misunderstanding 52914 1 0.1 attr 2.49362 ns
ram 53172 1 0.1 attr 2.48505 ns
episode 456506 3 1.0 attr 2.48399 ns
bug 227105 2 0.5 attr 2.47905 ns
bonus 227362 2 0.5 attr 2.47569 ns
fishery 53906 1 0.1 attr 2.46094 ns
fort 53934 1 0.1 attr 2.46002 ns
swap 53964 1 0.1 attr 2.45905 ns
hedge 54343 1 0.1 attr 2.44676 ns
calling 54375 1 0.1 attr 2.44572 ns
poll 230039 2 0.5 attr 2.44091 ns
skeleton 54677 1 0.1 attr 2.43600 ns
demonstration 230975 2 0.5 attr 2.42888 ns
outing 55288 1 0.1 attr 2.41653 ns
anomaly 55317 1 0.1 attr 2.41561 ns
arch 55320 1 0.1 attr 2.41552 ns
ramp 55678 1 0.1 attr 2.40423 ns
burger 56244 1 0.1 attr 2.38655 ns
promoter 56782 1 0.1 attr 2.36993 ns
tackle 56890 1 0.1 attr 2.36661 ns
shopper 57003 1 0.1 attr 2.36315 ns
trick 236354 2 0.5 attr 2.36101 ns
collection 1023278 5 2.3 attr 2.35841 ns
pillow 57667 1 0.1 attr 2.34298 ns
reconciliation 58409 1 0.1 attr 2.32075 ns
bang 58923 1 0.1 attr 2.30554 ns
hose 58962 1 0.1 attr 2.30439 ns
truck 241934 2 0.5 attr 2.29279 ns
wardrobe 59414 1 0.1 attr 2.29115 ns
handset 60220 1 0.1 attr 2.26783 ns
call 1041311 5 2.3 attr 2.26486 ns
trio 60359 1 0.1 attr 2.26385 ns
curse 60550 1 0.1 attr 2.25839 ns
virus 246503 2 0.6 attr 2.23852 ns
communism 61352 1 0.1 attr 2.23568 ns
spectacle 61555 1 0.1 attr 2.22999 ns
chord 61597 1 0.1 attr 2.22881 ns
magnet 61631 1 0.1 attr 2.22786 ns
metric 61920 1 0.1 attr 2.21981 ns
cuisine 62842 1 0.1 attr 2.19440 ns
ocean 250483 2 0.6 attr 2.19236 ns
turkey 62923 1 0.1 attr 2.19219 ns
sleep 250851 2 0.6 attr 2.18815 ns
launch 253063 2 0.6 attr 2.16298 ns
torch 64032 1 0.1 attr 2.16224 ns
dive 64203 1 0.1 attr 2.15767 ns
beverage 64261 1 0.1 attr 2.15613 ns
compiler 64299 1 0.1 attr 2.15511 ns
revival 64844 1 0.1 attr 2.14069 ns
scrap 65292 1 0.1 attr 2.12893 ns
slogan 65483 1 0.1 attr 2.12395 ns
faction 66595 1 0.1 attr 2.09528 ns
pavement 66675 1 0.2 attr 2.09324 ns
boss 260994 2 0.6 attr 2.07525 ns
breakthrough 67666 1 0.2 attr 2.06819 ns
villain 68554 1 0.2 attr 2.04611 ns
chart 263930 2 0.6 attr 2.04372 ns
sickness 68726 1 0.2 attr 2.04188 ns
detector 69253 1 0.2 attr 2.02897 ns
commercial 69509 1 0.2 attr 2.02274 ns
plugin 69857 1 0.2 attr 2.01432 ns
soundtrack 69950 1 0.2 attr 2.01208 ns
pillar 70357 1 0.2 attr 2.00231 ns
witch 71094 1 0.2 attr 1.98478 ns
intersection 71110 1 0.2 attr 1.98441 ns
cleaner 71524 1 0.2 attr 1.97466 ns
low 71592 1 0.2 attr 1.97307 ns
briefing 71830 1 0.2 attr 1.96750 ns
program 2753536 10 6.2 attr 1.96548 ns
hole 523770 3 1.2 attr 1.96200 ns
shark 72269 1 0.2 attr 1.95729 ns
filmmaker 72366 1 0.2 attr 1.95504 ns
sovereignty 72590 1 0.2 attr 1.94987 ns
stereotype 72687 1 0.2 attr 1.94764 ns
persecution 72823 1 0.2 attr 1.94451 ns
thread 528257 3 1.2 attr 1.93101 ns
burn 73559 1 0.2 attr 1.92771 ns
habit 275836 2 0.6 attr 1.92081 ns
hammer 74115 1 0.2 attr 1.91515 ns
resource 1429502 6 3.2 attr 1.91351 ns
taxation 74485 1 0.2 attr 1.90686 ns
ridge 75602 1 0.2 attr 1.88212 ns
collaboration 282528 2 0.6 attr 1.85505 ns
leap 78801 1 0.2 attr 1.81363 ns
gem 79253 1 0.2 attr 1.80423 ns
clash 79364 1 0.2 attr 1.80193 ns
cocktail 80147 1 0.2 attr 1.78582 ns
prophecy 80294 1 0.2 attr 1.78281 ns
dawn 80571 1 0.2 attr 1.77717 ns
drill 80799 1 0.2 attr 1.77255 ns
bar 841077 4 1.9 attr 1.77101 ns
display 552757 3 1.2 attr 1.76926 ns
portal 82284 1 0.2 attr 1.74281 ns
quotation 82355 1 0.2 attr 1.74140 ns
funeral 82363 1 0.2 attr 1.74125 ns
mutation 82687 1 0.2 attr 1.73485 ns
feast 83199 1 0.2 attr 1.72481 ns
lesson 560202 3 1.3 attr 1.72250 ns
worm 84520 1 0.2 attr 1.69925 ns
disruption 84544 1 0.2 attr 1.69879 ns
settlement 301010 2 0.7 attr 1.68477 ns
mapping 85695 1 0.2 attr 1.67693 ns
bush 85987 1 0.2 attr 1.67144 ns
axis 86465 1 0.2 attr 1.66250 ns
shoot 86983 1 0.2 attr 1.65289 ns
audio 87233 1 0.2 attr 1.64828 ns
corporation 306830 2 0.7 attr 1.63437 ns
vulnerability 88669 1 0.2 attr 1.62208 ns
assumption 308674 2 0.7 attr 1.61870 ns
litigation 88874 1 0.2 attr 1.61839 ns
promise 308762 2 0.7 attr 1.61796 ns
extreme 89659 1 0.2 attr 1.60433 ns
essay 311604 2 0.7 attr 1.59410 ns
statement 1193356 5 2.7 attr 1.58614 ns
palm 91331 1 0.2 attr 1.57491 ns
insurer 92291 1 0.2 attr 1.55832 ns
retreat 92827 1 0.2 attr 1.54915 ns
archives 92846 1 0.2 attr 1.54882 ns
herb 92923 1 0.2 attr 1.54751 ns
blast 93264 1 0.2 attr 1.54172 ns
referendum 93547 1 0.2 attr 1.53694 ns
spin 94172 1 0.2 attr 1.52643 ns
contradiction 94891 1 0.2 attr 1.51446 ns
monopoly 94993 1 0.2 attr 1.51277 ns
onion 95828 1 0.2 attr 1.49902 ns
accent 95848 1 0.2 attr 1.49869 ns
warrior 95990 1 0.2 attr 1.49637 ns
nonsense 97175 1 0.2 attr 1.47717 ns
cage 97755 1 0.2 attr 1.46788 ns
cure 97873 1 0.2 attr 1.46600 ns
nest 98564 1 0.2 attr 1.45504 ns
snack 98843 1 0.2 attr 1.45064 ns
lobby 99340 1 0.2 attr 1.44284 ns
translation 331753 2 0.7 attr 1.43416 ns
expedition 100055 1 0.2 attr 1.43172 ns
generator 100234 1 0.2 attr 1.42895 ns
handle 101467 1 0.2 attr 1.41005 ns
raid 101637 1 0.2 attr 1.40747 ns
wait 101650 1 0.2 attr 1.40727 ns
bankruptcy 102803 1 0.2 attr 1.38990 ns
card 1252691 5 2.8 attr 1.36791 ns
honey 105838 1 0.2 attr 1.34537 ns
high 106096 1 0.2 attr 1.34167 ns
role 1942801 7 4.4 attr 1.33317 ns
withdrawal 107013 1 0.2 attr 1.32858 ns
breakdown 107036 1 0.2 attr 1.32826 ns
threat 631556 3 1.4 attr 1.32431 ns
stack 109029 1 0.2 attr 1.30033 ns
pleasure 350326 2 0.8 attr 1.29987 ns
cruise 109370 1 0.2 attr 1.29562 ns
tool 1274106 5 2.9 attr 1.29478 ns
rush 109524 1 0.2 attr 1.29350 ns
brief 109724 1 0.2 attr 1.29075 ns
builder 110149 1 0.2 attr 1.28493 ns
breakfast 354647 2 0.8 attr 1.27029 ns
local 111866 1 0.3 attr 1.26173 ns
supervisor 112046 1 0.3 attr 1.25932 ns
tomato 112671 1 0.3 attr 1.25101 ns
chaos 112767 1 0.3 attr 1.24974 ns
subsidy 113035 1 0.3 attr 1.24620 ns
killing 113442 1 0.3 attr 1.24084 ns
flaw 114000 1 0.3 attr 1.23354 ns
junction 115044 1 0.3 attr 1.22001 ns
hub 115087 1 0.3 attr 1.21945 ns
wake 115173 1 0.3 attr 1.21835 ns
pipeline 115198 1 0.3 attr 1.21802 ns
archive 116250 1 0.3 attr 1.20458 ns
injection 116373 1 0.3 attr 1.20302 ns
dance 372629 2 0.8 attr 1.15339 ns
wedding 373060 2 0.8 attr 1.15070 ns
shot 668296 3 1.5 attr 1.15040 ns
escape 120760 1 0.3 attr 1.14876 ns
beauty 373430 2 0.8 attr 1.14840 ns
theft 121314 1 0.3 attr 1.14210 ns
hook 124159 1 0.3 attr 1.10854 ns
strategy 1000104 4 2.3 attr 1.10129 ns
bulb 124799 1 0.3 attr 1.10114 ns
shortage 124929 1 0.3 attr 1.09964 ns
remedy 125470 1 0.3 attr 1.09344 ns
framework 385069 2 0.9 attr 1.07802 ns
auction 126835 1 0.3 attr 1.07794 ns
console 127150 1 0.3 attr 1.07440 ns
throat 127298 1 0.3 attr 1.07274 ns
comic 127335 1 0.3 attr 1.07232 ns
rhythm 127463 1 0.3 attr 1.07089 ns
correction 128033 1 0.3 attr 1.06453 ns
barrel 129326 1 0.3 attr 1.05026 ns
frustration 130015 1 0.3 attr 1.04273 ns
miracle 130389 1 0.3 attr 1.03867 ns
worry 130491 1 0.3 attr 1.03757 ns
excess 130779 1 0.3 attr 1.03445 ns
summit 131049 1 0.3 attr 1.03154 ns
jump 131091 1 0.3 attr 1.03109 ns
idiot 131779 1 0.3 attr 1.02372 ns
rod 132075 1 0.3 attr 1.02056 ns
pile 132674 1 0.3 attr 1.01421 ns
upgrade 132744 1 0.3 attr 1.01347 ns
healing 132888 1 0.3 attr 1.01195 ns
slope 133212 1 0.3 attr 1.00854 ns
ritual 133276 1 0.3 attr 1.00787 ns
shit 134127 1 0.3 attr 0.99897 ns
encounter 134341 1 0.3 attr 0.99674 ns
profile 401055 2 0.9 attr 0.98728 ns
controller 136497 1 0.3 attr 0.97461 ns
flash 137310 1 0.3 attr 0.96639 ns
bench 138789 1 0.3 attr 0.95162 ns
commodity 139698 1 0.3 attr 0.94265 ns
journalism 140463 1 0.3 attr 0.93518 ns
driving 140746 1 0.3 attr 0.93243 ns
debt 725919 3 1.6 attr 0.91352 ns
workshop 416293 2 0.9 attr 0.90671 ns
blessing 144392 1 0.3 attr 0.89769 ns
honour 145022 1 0.3 attr 0.89182 ns
treaty 145690 1 0.3 attr 0.88563 ns
vacation 145802 1 0.3 attr 0.88460 ns
instrument 421053 2 0.9 attr 0.88266 ns
taxi 146319 1 0.3 attr 0.87985 ns
blow 148196 1 0.3 attr 0.86281 ns
controversy 149243 1 0.3 attr 0.85344 ns
juice 149655 1 0.3 attr 0.84978 ns
debate 744894 3 1.7 attr 0.84411 ns
explosion 152063 1 0.3 attr 0.82870 ns
cow 152859 1 0.3 attr 0.82184 ns
desert 153648 1 0.3 attr 0.81509 ns
experiment 436124 2 1.0 attr 0.80983 ns
trust 436580 2 1.0 attr 0.80770 ns
constraint 154600 1 0.3 attr 0.80702 ns
capitalism 154716 1 0.3 attr 0.80605 ns
solicitor 154940 1 0.3 attr 0.80416 ns
session 756567 3 1.7 attr 0.80335 ns
breach 155313 1 0.3 attr 0.80103 ns
celebrity 155330 1 0.3 attr 0.80089 ns
killer 156029 1 0.4 attr 0.79506 ns
terror 156068 1 0.4 attr 0.79473 ns
sentence 440439 2 1.0 attr 0.78987 ns
manuscript 157696 1 0.4 attr 0.78131 ns
invasion 159647 1 0.4 attr 0.76550 ns
revelation 159820 1 0.4 attr 0.76411 ns
garden 771523 3 1.7 attr 0.75322 ns
knife 161595 1 0.4 attr 0.75001 ns
accident 451655 2 1.0 attr 0.73977 ns
pin 163138 1 0.4 attr 0.73795 ns
headline 163678 1 0.4 attr 0.73377 ns
documentary 165044 1 0.4 attr 0.72330 ns
judgement 165458 1 0.4 attr 0.72015 ns
objection 166669 1 0.4 attr 0.71102 ns
song 1130049 4 2.5 attr 0.70889 ns
deck 168043 1 0.4 attr 0.70079 ns
exchange 461558 2 1.0 attr 0.69759 ns
camp 463571 2 1.0 attr 0.68925 ns
printer 171003 1 0.4 attr 0.67919 ns
necessity 171957 1 0.4 attr 0.67236 ns
deadline 174093 1 0.4 attr 0.65728 ns
coalition 174452 1 0.4 attr 0.65478 ns
creator 174710 1 0.4 attr 0.65298 ns
can 175713 1 0.4 attr 0.64605 ns
tent 176099 1 0.4 attr 0.64340 ns
loop 176503 1 0.4 attr 0.64064 ns
algorithm 178150 1 0.4 attr 0.62947 ns
watch 181136 1 0.4 attr 0.60967 ns
cycle 484448 2 1.1 attr 0.60702 ns
desire 484598 2 1.1 attr 0.60646 ns
commentary 182036 1 0.4 attr 0.60381 ns
retailer 182421 1 0.4 attr 0.60132 ns
ministry 183193 1 0.4 attr 0.59635 ns
calendar 183976 1 0.4 attr 0.59134 ns
holder 185780 1 0.4 attr 0.57995 ns
transformation 186340 1 0.4 attr 0.57645 ns
suite 186467 1 0.4 attr 0.57566 ns
download 187639 1 0.4 attr 0.56841 ns
belt 187672 1 0.4 attr 0.56820 ns
icon 188866 1 0.4 attr 0.56089 ns
indicator 190593 1 0.4 attr 0.55046 ns
patch 191236 1 0.4 attr 0.54662 ns
camera 844741 3 1.9 attr 0.53881 ns
specification 192855 1 0.4 attr 0.53705 ns
complaint 505548 2 1.1 attr 0.53148 ns
pump 193852 1 0.4 attr 0.53123 ns
outlet 194598 1 0.4 attr 0.52690 ns
toy 195490 1 0.4 attr 0.52177 ns
silver 198799 1 0.4 attr 0.50310 ns
export 198914 1 0.4 attr 0.50246 ns
fall 514710 2 1.2 attr 0.50088 ns
anger 199651 1 0.4 attr 0.49838 ns
play 861299 3 1.9 attr 0.49688 ns
punishment 200202 1 0.5 attr 0.49535 ns
signature 202695 1 0.5 attr 0.48182 ns
band 883506 3 2.0 attr 0.44410 ns
resort 210489 1 0.5 attr 0.44145 ns
set 1253824 4 2.8 attr 0.43452 ns
deficit 213406 1 0.5 attr 0.42705 ns
ruling 213532 1 0.5 attr 0.42644 ns
peak 213830 1 0.5 attr 0.42499 ns
tour 541106 2 1.2 attr 0.41966 ns
tear 215102 1 0.5 attr 0.41886 ns
assembly 216250 1 0.5 attr 0.41338 ns
bag 544296 2 1.2 attr 0.41051 ns
occupation 217936 1 0.5 attr 0.40544 ns
chocolate 218632 1 0.5 attr 0.40219 ns
cap 218801 1 0.5 attr 0.40141 ns
comedy 223250 1 0.5 attr 0.38118 ns
doctrine 223749 1 0.5 attr 0.37896 ns
roll 224739 1 0.5 attr 0.37459 ns
drama 226070 1 0.5 attr 0.36877 ns
favour 227710 1 0.5 attr 0.36170 ns
currency 228031 1 0.5 attr 0.36033 ns
limitation 229442 1 0.5 attr 0.35434 ns
reception 229457 1 0.5 attr 0.35428 ns
tag 231025 1 0.5 attr 0.34772 ns
announcement 231829 1 0.5 attr 0.34439 ns
directory 232126 1 0.5 attr 0.34316 ns
offence 232857 1 0.5 attr 0.34017 ns
desk 233178 1 0.5 attr 0.33886 ns
cake 233279 1 0.5 attr 0.33845 ns
corner 573649 2 1.3 attr 0.33255 ns
profession 237531 1 0.5 attr 0.32146 ns
dream 579428 2 1.3 attr 0.31847 ns
offering 239653 1 0.5 attr 0.31323 ns
utility 241900 1 0.5 attr 0.30468 ns
concert 245194 1 0.6 attr 0.29246 ns
dealer 245230 1 0.6 attr 0.29233 ns
project 2920737 8 6.6 attr 0.28898 ns
pitch 249776 1 0.6 attr 0.27606 ns
birthday 252338 1 0.6 attr 0.26718 ns
challenge 976652 3 2.2 attr 0.26201 ns
cut 607014 2 1.4 attr 0.25662 ns
dish 259180 1 0.6 attr 0.24448 ns
law 2970638 8 6.7 attr 0.24254 ns
tape 263466 1 0.6 attr 0.23097 ns
award 619816 2 1.4 attr 0.23077 ns
device 1004035 3 2.3 attr 0.21937 ns
buyer 268508 1 0.6 attr 0.21576 ns
summary 274222 1 0.6 attr 0.19937 ns
programme 1018141 3 2.3 attr 0.19917 ns
statistic 274321 1 0.6 attr 0.19909 ns
lens 275156 1 0.6 attr 0.19677 ns
expansion 276728 1 0.6 attr 0.19246 ns
chip 277818 1 0.6 attr 0.18950 ns
fiction 279646 1 0.6 attr 0.18462 ns
entity 279823 1 0.6 attr 0.18415 ns
critic 286287 1 0.6 attr 0.16757 ns
kit 287625 1 0.6 attr 0.16427 ns
force 1441727 4 3.2 attr 0.16322 ns
disorder 289519 1 0.7 attr 0.15967 ns
regulation 664690 2 1.5 attr 0.15319 ns
discovery 292330 1 0.7 attr 0.15300 ns
test 1453453 4 3.3 attr 0.15121 ns
examination 293252 1 0.7 attr 0.15086 ns
transaction 293346 1 0.7 attr 0.15064 ns
symbol 297287 1 0.7 attr 0.14169 ns
stand 298134 1 0.7 attr 0.13982 ns
motor 299186 1 0.7 attr 0.13751 ns
trail 300373 1 0.7 attr 0.13493 ns
nation 1070592 3 2.4 attr 0.13389 ns
venue 301679 1 0.7 attr 0.13214 ns
obligation 302200 1 0.7 attr 0.13103 ns
guideline 302697 1 0.7 attr 0.12998 ns
substance 303715 1 0.7 attr 0.12785 ns
operation 1077467 3 2.4 attr 0.12644 ns
enterprise 306978 1 0.7 attr 0.12117 ns
document 1084146 3 2.4 attr 0.11943 ns
president 691318 2 1.6 attr 0.11596 ns
browser 313703 1 0.7 attr 0.10810 ns
boundary 315862 1 0.7 attr 0.10411 ns
moment 1507095 4 3.4 attr 0.10276 ns
warning 318576 1 0.7 attr 0.09921 ns
bone 321606 1 0.7 attr 0.09392 ns
organization 1121667 3 2.5 attr 0.08421 ns
shopping 329217 1 0.7 attr 0.08141 ns
surgery 329260 1 0.7 attr 0.08134 ns
core 329895 1 0.7 attr 0.08035 ns
domain 330398 1 0.7 attr 0.07957 ns
movement 1127561 3 2.5 attr 0.07930 ns
roof 331046 1 0.7 attr 0.07857 ns
bond 333727 1 0.8 attr 0.07450 ns
intervention 333741 1 0.8 attr 0.07448 ns
department 729436 2 1.6 attr 0.07288 ns
exhibition 335194 1 0.8 attr 0.07234 ns
agent 731588 2 1.6 attr 0.07079 ns
plant 1143070 3 2.6 attr 0.06716 ns
studio 341799 1 0.8 attr 0.06305 ns
interview 740088 2 1.7 attr 0.06285 ns
agenda 342955 1 0.8 attr 0.06150 ns
sky 344377 1 0.8 attr 0.05963 ns
stream 344385 1 0.8 attr 0.05962 ns
installation 345561 1 0.8 attr 0.05809 ns
recipe 346252 1 0.8 attr 0.05721 ns
murder 347936 1 0.8 attr 0.05509 ns
prayer 351819 1 0.8 attr 0.05037 ns
default 352247 1 0.8 attr 0.04987 ns
sequence 355402 1 0.8 attr 0.04624 ns
saving 355519 1 0.8 attr 0.04611 ns
achievement 355966 1 0.8 attr 0.04561 ns
module 356801 1 0.8 attr 0.04468 ns
email 762871 2 1.7 attr 0.04415 ns
quote 358228 1 0.8 attr 0.04312 ns
stop 358336 1 0.8 attr 0.04301 ns
average 358804 1 0.8 attr 0.04250 ns
target 768033 2 1.7 attr 0.04041 ns
library 769194 2 1.7 attr 0.03960 ns
institution 769238 2 1.7 attr 0.03957 ns
intention 364754 1 0.8 attr 0.03640 ns
trial 777998 2 1.8 attr 0.03371 ns
interface 367794 1 0.8 attr 0.03349 ns
PC 373950 1 0.8 attr 0.02801 ns
line 2918192 7 6.6 attr 0.02763 ns
gap 377762 1 0.9 attr 0.02488 ns
feedback 378395 1 0.9 attr 0.02439 ns
diet 378653 1 0.9 attr 0.02418 ns
sheet 380386 1 0.9 attr 0.02285 ns
introduction 380565 1 0.9 attr 0.02272 ns
bridge 385447 1 0.9 attr 0.01921 ns
egg 385602 1 0.9 attr 0.01910 ns
thinking 387039 1 0.9 attr 0.01813 ns
philosophy 389336 1 0.9 attr 0.01664 ns
tax 1237521 3 2.8 attr 0.01604 ns
politician 390466 1 0.9 attr 0.01593 ns
dinner 395745 1 0.9 attr 0.01284 ns
chain 399181 1 0.9 attr 0.01102 ns
forest 399772 1 0.9 attr 0.01072 ns
agreement 825615 2 1.9 attr 0.01048 ns
technology 1692597 4 3.8 attr 0.00928 ns
coverage 407680 1 0.9 attr 0.00715 ns
column 409932 1 0.9 attr 0.00627 ns
edition 413714 1 0.9 attr 0.00493 ns
crowd 417911 1 0.9 attr 0.00365 ns
literature 418156 1 0.9 attr 0.00358 ns
association 419027 1 0.9 attr 0.00334 ns
screen 856251 2 1.9 attr 0.00268 ns
photograph 422438 1 1.0 attr 0.00248 ns
principle 858449 2 1.9 attr 0.00232 ns
charity 427075 1 1.0 attr 0.00152 ns
bike 429560 1 1.0 attr 0.00111 ns
metal 429904 1 1.0 attr 0.00106 ns
review 1323143 3 3.0 attr 0.00015 ns
root 439931 1 1.0 attr 0.00009 ns
suggestion 441317 1 1.0 attr 0.00004 ns
wheel 443722 1 1.0 attr 0.00000 ns
load 448582 1 1.0 rep 0.00010 ns
phone 1351135 3 3.0 rep 0.00058 ns
act 903783 2 2.0 rep 0.00059 ns
incident 459591 1 1.0 rep 0.00118 ns
rise 460675 1 1.0 rep 0.00134 ns
scene 923399 2 2.1 rep 0.00303 ns
track 929972 2 2.1 rep 0.00425 ns
distribution 475388 1 1.1 rep 0.00472 ns
mark 475488 1 1.1 rep 0.00474 ns
picture 1390224 3 3.1 rep 0.00546 ns
beach 478935 1 1.1 rep 0.00582 ns
developer 482229 1 1.1 rep 0.00694 ns
link 1401112 3 3.2 rep 0.00768 ns
photo 955072 2 2.2 rep 0.01074 ns
boy 958043 2 2.2 rep 0.01170 ns
journey 494300 1 1.1 rep 0.01185 ns
occasion 495300 1 1.1 rep 0.01231 ns
mine 495834 1 1.1 rep 0.01256 ns
ad 496761 1 1.1 rep 0.01300 ns
farm 497603 1 1.1 rep 0.01340 ns
wall 967701 2 2.2 rep 0.01506 ns
combination 502779 1 1.1 rep 0.01601 ns
meal 502973 1 1.1 rep 0.01611 ns
truth 973427 2 2.2 rep 0.01725 ns
creation 506144 1 1.1 rep 0.01782 ns
guide 511190 1 1.2 rep 0.02070 ns
chair 515546 1 1.2 rep 0.02334 ns
format 516699 1 1.2 rep 0.02407 ns
identity 516861 1 1.2 rep 0.02417 ns
justice 518297 1 1.2 rep 0.02509 ns
magazine 522212 1 1.2 rep 0.02767 ns
winter 522826 1 1.2 rep 0.02808 ns
employment 529672 1 1.2 rep 0.03289 ns
block 537311 1 1.2 rep 0.03865 ns
newspaper 538076 1 1.2 rep 0.03925 ns
consequence 538243 1 1.2 rep 0.03938 ns
balance 549348 1 1.2 rep 0.04854 ns
flight 551006 1 1.2 rep 0.04998 ns
server 555309 1 1.3 rep 0.05379 ns
code 1044269 2 2.4 rep 0.05524 ns
language 1524438 3 3.4 rep 0.05685 ns
object 1049923 2 2.4 rep 0.05910 ns
forum 562226 1 1.3 rep 0.06018 ns
provider 562832 1 1.3 rep 0.06076 ns
marketing 564228 1 1.3 rep 0.06209 ns
skin 571744 1 1.3 rep 0.06947 ns
conversation 574894 1 1.3 rep 0.07266 ns
limit 589910 1 1.3 rep 0.08871 ns
father 1097050 2 2.5 rep 0.09569 ns
vote 599352 1 1.3 rep 0.09947 ns
effort 1590763 3 3.6 rep 0.09999 ns
speech 601827 1 1.4 rep 0.10237 ns
plan 2077978 4 4.7 rep 0.10354 ns
component 607752 1 1.4 rep 0.10946 ns
temperature 614841 1 1.4 rep 0.11819 ns
turn 619074 1 1.4 rep 0.12352 ns
ship 619244 1 1.4 rep 0.12374 ns
panel 623713 1 1.4 rep 0.12948 ns
theme 624760 1 1.4 rep 0.13084 ns
beginning 625533 1 1.4 rep 0.13185 ns
editor 630874 1 1.4 rep 0.13890 ns
strength 631303 1 1.4 rep 0.13947 ns
rock 635915 1 1.4 rep 0.14568 ns
committee 649117 1 1.5 rep 0.16403 ns
survey 651770 1 1.5 rep 0.16782 ns
unit 1175510 2 2.6 rep 0.17270 ns
restaurant 655528 1 1.5 rep 0.17325 ns
map 659590 1 1.5 rep 0.17918 ns
policy 2214601 4 5.0 rep 0.20941 ns
request 683678 1 1.5 rep 0.21592 ns
drive 687771 1 1.5 rep 0.22241 ns
protection 690636 1 1.6 rep 0.22700 ns
brand 691403 1 1.6 rep 0.22823 ns
idea 3222434 6 7.3 rep 0.23109 ns
bank 1227345 2 2.8 rep 0.23353 ns
freedom 695137 1 1.6 rep 0.23428 ns
look 1229713 2 2.8 rep 0.23648 ns
page 2244788 4 5.1 rep 0.23705 ns
color 702864 1 1.6 rep 0.24696 ns
move 703414 1 1.6 rep 0.24788 ns
weekend 707537 1 1.6 rep 0.25476 ns
sport 715589 1 1.6 rep 0.26838 ns
candidate 716013 1 1.6 rep 0.26911 ns
field 1774150 3 4.0 rep 0.27123 ns
ball 721486 1 1.6 rep 0.27853 ns
spirit 746384 1 1.7 rep 0.32279 ns
solution 1298902 2 2.9 rep 0.32912 ns
letter 1302032 2 2.9 rep 0.33359 ns
reading 755389 1 1.7 rep 0.33936 ns
attempt 760642 1 1.7 rep 0.34916 ns
release 764873 1 1.7 rep 0.35712 ns
technique 765580 1 1.7 rep 0.35845 ns
society 1325576 2 3.0 rep 0.36794 ns
star 770595 1 1.7 rep 0.36798 ns
context 774919 1 1.7 rep 0.37626 ns
advantage 779466 1 1.8 rep 0.38504 ns
feature 1341791 2 3.0 rep 0.39234 ns
pain 787467 1 1.8 rep 0.40064 ns
feeling 794019 1 1.8 rep 0.41358 ns
hospital 795631 1 1.8 rep 0.41678 ns
writer 821080 1 1.8 rep 0.46842 ns
website 1399587 2 3.2 rep 0.48395 ns
ground 1427167 2 3.2 rep 0.53011 ns
earth 852266 1 1.9 rep 0.53432 ns
fund 860028 1 1.9 rep 0.55114 ns
connection 861807 1 1.9 rep 0.55502 ns
study 2524668 4 5.7 rep 0.55768 ns
round 864957 1 1.9 rep 0.56191 ns
piece 1455918 2 3.3 rep 0.57982 ns
organisation 880104 1 2.0 rep 0.59540 ns
concept 886166 1 2.0 rep 0.60898 ns
station 886640 1 2.0 rep 0.61004 ns
word 3094822 5 7.0 rep 0.61769 ns
increase 893261 1 2.0 rep 0.62498 ns
cause 895350 1 2.0 rep 0.62972 ns
relationship 1503438 2 3.4 rep 0.66539 ns
shop 916859 1 2.1 rep 0.67911 ns
variety 920679 1 2.1 rep 0.68800 ns
sign 925028 1 2.1 rep 0.69816 ns
start 926054 1 2.1 rep 0.70057 ns
contract 937276 1 2.1 rep 0.72702 ns
agency 955328 1 2.2 rep 0.77017 ns
summer 967995 1 2.2 rep 0.80087 ns
machine 979519 1 2.2 rep 0.82911 ns
factor 982968 1 2.2 rep 0.83761 ns
medium 1600156 2 3.6 rep 0.85164 ns
case 4849558 8 10.9 rep 0.86245 ns
video 1014287 1 2.3 rep 0.91594 ns
club 1024712 1 2.3 rep 0.94244 ns
road 1646912 2 3.7 rep 0.94705 ns
search 1034257 1 2.3 rep 0.96689 ns
centre 1036799 1 2.3 rep 0.97343 ns
economy 1039246 1 2.3 rep 0.97974 ns
eye 1670707 2 3.8 rep 0.99685 ns
drug 1056407 1 2.4 rep 1.02428 ns
element 1058538 1 2.4 rep 1.02985 ns
web 1068297 1 2.4 rep 1.05545 ns
action 2291328 3 5.2 rep 1.06591 ns
front 1076903 1 2.4 rep 1.07816 ns
oil 1078909 1 2.4 rep 1.08348 ns
record 1719467 2 3.9 rep 1.10143 ns
table 1087260 1 2.4 rep 1.10567 ns
claim 1098223 1 2.5 rep 1.13498 ns
software 1099744 1 2.5 rep 1.13907 ns
paper 1736757 2 3.9 rep 1.13929 ns
item 1104930 1 2.5 rep 1.15302 ns
function 1113272 1 2.5 rep 1.17555 ns
board 1115799 1 2.5 rep 1.18240 ns
culture 1117440 1 2.5 rep 1.18685 ns
point 4569469 7 10.3 rep 1.18722 ns
concern 1161298 1 2.6 rep 1.30740 ns
discussion 1169522 1 2.6 rep 1.33033 ns
content 1195359 1 2.7 rep 1.40301 ns
period 1861604 2 4.2 rep 1.42404 ns
author 1218573 1 2.7 rep 1.46911 ns
decision 1930260 2 4.3 rep 1.58847 ns
environment 1270871 1 2.9 rep 1.62065 ns
date 1276522 1 2.9 rep 1.63723 ns
dog 1284808 1 2.9 rep 1.66162 ns
client 1288547 1 2.9 rep 1.67265 ns
form 2593352 3 5.8 rep 1.68372 ns
security 1294212 1 2.9 rep 1.68939 ns
treatment 1306731 1 2.9 rep 1.72654 ns
article 1997634 2 4.5 rep 1.75478 ns
application 2006190 2 4.5 rep 1.77623 ns
goal 1332778 1 3.0 rep 1.80440 ns
standard 1342762 1 3.0 rep 1.83445 ns
step 1355641 1 3.1 rep 1.87337 ns
example 3290323 4 7.4 rep 1.88826 ns
computer 1387323 1 3.1 rep 1.96990 ns
file 1406021 1 3.2 rep 2.02735 ns
season 1412789 1 3.2 rep 2.04824 ns
town 1423935 1 3.2 rep 2.08274 ns
method 1455289 1 3.3 rep 2.18043 ns
approach 1462758 1 3.3 rep 2.20384 ns
knowledge 1467454 1 3.3 rep 2.21858 ns
choice 1471587 1 3.3 rep 2.23158 ns
series 1478322 1 3.3 rep 2.25279 ns
rule 1509106 1 3.4 rep 2.35026 ns
report 2240605 2 5.0 rep 2.39039 ns
version 1526528 1 3.4 rep 2.40579 ns
process 2953827 3 6.6 rep 2.52719 ns
performance 1572848 1 3.5 rep 2.55470 ns
meeting 1579149 1 3.6 rep 2.57509 ns
history 2389008 2 5.4 rep 2.80259 ns
everything 1670953 1 3.8 rep 2.87570 ns
house 2419875 2 5.4 rep 2.89034 ns
food 2424381 2 5.5 rep 2.90320 ns
industry 1734445 1 3.9 rep 3.08719 ns
building 1734818 1 3.9 rep 3.08844 ns
material 1755358 1 4.0 rep 3.15745 ns
age 1797451 1 4.0 rep 3.29972 ns
space 1809161 1 4.1 rep 3.33950 ns
source 1810002 1 4.1 rep 3.34236 ns
music 1835020 1 4.1 rep 3.42764 ns
condition 1849891 1 4.2 rep 3.47851 ns
model 1854807 1 4.2 rep 3.49535 ns
design 1857967 1 4.2 rep 3.50619 ns
control 1912499 1 4.3 rep 3.69406 ns
rate 1970065 1 4.4 rep 3.89416
quality 2018355 1 4.5 rep 4.06335
day 8484568 11 19.1 rep 4.07785
car 2853424 2 6.4 rep 4.18511
activity 2063222 1 4.6 rep 4.22157
class 2067493 1 4.7 rep 4.23668
water 2930643 2 6.6 rep 4.42638
night 2235420 1 5.0 rep 4.83732
experience 3112293 2 7.0 rep 5.00469
type 2309807 1 5.2 rep 5.10713
name 3157412 2 7.1 rep 5.15052
course 3917068 3 8.8 rep 5.17588
view 2356961 1 5.3 rep 5.27925
week 4005095 3 9.0 rep 5.43947
right 3258788 2 7.3 rep 5.48115
product 2456372 1 5.5 rep 5.64471
development 2566496 1 5.8 rep 6.05342
hour 2571218 1 5.8 rep 6.07103
company 4219869 3 9.5 rep 6.09458
research 2702371 1 6.1 rep 6.56277
hand 2817215 1 6.3 rep 6.99729 **
state 3715915 2 8.4 rep 7.01730 **
community 2932355 1 6.6 rep 7.43631 **
team 3012198 1 6.8 rep 7.74258 **
power 3096084 1 7.0 rep 8.06590 **
money 3135425 1 7.1 rep 8.21805 **
group 4082803 2 9.2 rep 8.29538 **
school 4202653 2 9.5 rep 8.72026 **
service 5296725 3 11.9 rep 9.58874 **
woman 3567973 1 8.0 rep 9.91031 **
thing 7692246 6 17.3 rep 9.94716 **
member 3775608 1 8.5 rep 10.73375 **
man 5296581 2 11.9 rep 12.73095 ***
place 4952912 1 11.2 rep 15.50339 ****
world 5355120 1 12.1 rep 17.16295 ****
people 14168532 1 31.9 rep 55.10625 *****
year 15249944 1 34.3 rep 59.86710 *****

COCA

# read data
moa <- read_xlsx("../data/motherofall_COCA.xlsx")
# full frequency list cannot be shared publicly
# for license reasons, hence we work with the
# list containing only the lemmas occurring in 
# mother of all
#coca <- fread("../coca_2017_lemma_frequency_list.txt", quote = "")
coca <- fread("../data/coca_moa_lemma_frequencies.csv")
# replace whitespaces in column names
colnames(moa) <- gsub(" ", "_", colnames(moa))
colnames(coca) <- c("No", "Lemma", "Freq")
# omit false hits
moa <- subset(moa, keep=="y")
# types, tokens, and hapax legomena overall
moa_tbl1 <- moa %>% select(lemma) %>% table %>% as_tibble() %>% rename(c(Freq = "n") ) %>% arrange(desc(Freq))
tibble(
  Tokens = sum(moa_tbl1$Freq),
  Types = nrow(moa_tbl1),
  "Hapax Legomena" = length(which(moa_tbl1$Freq==1))
)
# generate input for collostructional analysis
moa_lemmas <- moa$lemma %>% table %>% sort(decreasing = T) %>% as.data.frame(stringsAsFactors = F)
colnames(moa_lemmas) <- c("Lemma", "Freq_in_cxn")
all_lemmas <- coca[,Lemma, Freq]
setcolorder(all_lemmas, c("Lemma", "Freq"))
collex_input <- join.freqs(moa_lemmas, as.data.frame(all_lemmas), all = F)
colnames(collex_input) <- c("Lemma", "cxn_freq", "cxn_all")
collex_input <- subset(collex_input, cxn_all != 0)
collex(collex_input, corpsize = sum(coca$Freq)) # %>% write_excel_csv("simple_collexeme_analysis.csv")
# relative frequency ------------------------------------------------------
# get COCA frequencies
coca_freq <- read_xlsx("../data/COCA2017_total_frequencies.xlsx")
# tabulate mother frequency
moa_tbl <- table(moa$Year) %>% as.data.frame(stringsAsFactors = F)
colnames(moa_tbl) <- c("YEAR", "Freq")
moa_tbl$YEAR <- as.numeric(moa_tbl$YEAR)
moa_tbl <- left_join(coca_freq, moa_tbl, by = "YEAR")
moa_tbl$pmw <- (moa_tbl$Freq / moa_tbl$TOTAL) * 1e06
# plot
# png("mother_of_all_coca_freq.png", width = 6.5, height = 5, un = "in", res = 300)
plot(moa_tbl$YEAR, moa_tbl$pmw, pch = 20, col = "blue",
     ylab = "Frequency per million words", xlab = "Year",
     main = expression(paste("[", italic("mother of all"), " X], COCA")))
abline(lm(moa_tbl$pmw ~ moa_tbl$YEAR), lty = 2, col = "darkgrey")

# dev.off()
# types per decade
moa_types <- moa %>% group_by(Decade) %>% summarise(
  types = length(unique(lemma)),
  n = n()
)
# types per year
moa_types_year <- moa %>% group_by(Year) %>% summarise(
  types = length(unique(lemma))
)
# add to table with total frequencies
moa_tbl <- left_join(moa_types_year, moa_tbl, by = c("Year" = "YEAR"))
moa_tbl <- rename(moa_tbl, "YEAR" = "Year")
moa_tbl$types_pmw <- (moa_tbl$types / moa_tbl$TOTAL) * 1e6
# coca_freq per dcade
coca_freq$Decade <- floor(coca_freq$YEAR/10)*10
coca_freq_decade <- coca_freq %>% group_by(Decade) %>% summarise(
  n = sum(TOTAL)
)
# types per decade
moa_types <- left_join(moa_types, coca_freq_decade)
## Joining, by = c("Decade", "n")
moa_types$rel <- moa_types$types / moa_types$n
moa_types$rel %>% plot

moa_types$ttr <- moa_types$types / moa_types$n
# distribution of hapaxes
hapaxes <- table(moa$lemma) %>% as.data.frame %>% filter(Freq==1) %>% select(Var1) %>% as.vector
hapaxes <- as.character(hapaxes$Var1)
moa$hapax <- ifelse(moa$lemma %in% hapaxes, "y", "n")
moa_hapaxes <- moa %>% group_by(Decade) %>% summarise(
  hapaxes = length(which(hapax=="y")),
  n = n()
)
# plot potential productivity
moa_hapaxes$pp <- moa_hapaxes$hapaxes / moa_hapaxes$n
par(mfrow = c(1,3))
# png("types_tokens_mother_COCA.png", width = 12, height = 4, un = "in", res = 300)
par(mfrow=c(1,3))
par(mar = c(5.1, 5.1, 5.1, 2.1))
plot(moa_tbl$YEAR, moa_tbl$pmw, pch = 20, col = "blue",
     ylab = "Frequency per million words", xlab = "Year",
     main = expression(paste(bold("["), bolditalic("mother of all"), bold(" X], COCA"))),
     cex = 2, cex.lab = 2, cex.axis=1.5)
abline(lm(moa_tbl$pmw ~ moa_tbl$YEAR), lty = 2, col = "darkgrey", lwd = 2)
points(moa_tbl$YEAR, moa_tbl$types_pmw, col = rgb(1,0,0,.5), pch = 18, cex = 1.3)
plot(moa_types$Decade, moa_types$ttr,
     type = "b", pch=18,
     ylab = "Types / Tokens", xlab = "Decade",
     main = "Type-Token Ratio, COCA",
     lwd = 2, col = "blue", cex = 2, cex.lab = 2, cex.axis=2, 
     xaxt = "n"
     )
axis(1, at = c(1990, 2000, 2010), cex.axis=2)
plot(moa_hapaxes$Decade, moa_hapaxes$pp, type = "b", pch=18,
     ylab = "Proportion hapaxes", xlab = "Decade",
     main = "Potential productivity \n (proportion of hapax legomena), COCA",
     lwd = 2, col = "blue", cex = 2, cex.lab = 2, cex.axis=2, xaxt = "n")
axis(1, at = c(1990, 2000, 2010), cex.axis=2)

# dev.off()
par(mar = c(5.1, 4.1, 4.1, 2.1))
par(mfrow=c(1,1))

Distributional semantics

To assess the semantics of the slot fillers in more detail, we use distributional semantics. More specifically, we draw on word2vec. Word2vec is originally the name of a software comprising two main algorithms for representing words in terms of dense vectors, but the term has become virtually synonymous with the approach itself. We use the R package wordVectors, which builds on the original word2vec code. We use the skip-gram approach, which is generally considered to work well with a small amount of training data (see e.g. this tutorial) than the alternative continuous-bag-of-words (cbow) approach. The model was trained based on the first of the 17 downloadable sentence shuffles of ENCOW using the following code:

# read data & export text files without annotation
# Note: the downloadable file containing the first sentence shuffle
# of ENCOW has been split in c. 100 parts, named xaa, xab etc. etc.,
# hence the pattern in the list.file command below. The algorithm
# produces a txt file containing only the words, without annotations
f <- list.files("/Volumes/INTENSO/Corpora/ENCOW/", pattern = "^x..", full.names = T) 

for(i in 1:length(f)) {
  d <- vroom_lines(f[i])
  d <- gsub("^<.*|\t.*", "", d)
  d <- d[d!=""]
  vroom_write_lines(d, paste0("/Volumes/INTENSO/Corpora/ENCOW/encow_word2vec_training/encow16b_words_for_training001", i, ".txt"))
  print(i)
}

# Next, the prep_word2vec command from the WordVectors package is used
# to prepare the training file, containing just the words in lowercase,
# without punctuation. The model is then trained on the basis of this
# file and exported.
prep_word2vec(origin="/Volumes/INTENSO/Corpora/ENCOW/encow_word2vec_training/words", destination="/Volumes/INTENSO/Corpora/ENCOW/encowa_w2v_words.txt", lowercase=T,bundle_ngrams=1)

# train model:
model <- train_word2vec("/Volumes/INTENSO/Corpora/ENCOW/encowa_w2v_words.txt", output_file  = "/Volumes/INTENSO/Corpora/ENCOW/encow_vectors_word_based.bin",vectors=100,threads=4,window=5,iter=3,negative_samples=5)

# export:
write_rds(model, "model.Rds")

We use this model to visualize the semantic proximity of the slot fillers. For dimensionality reduction, we use both Multidimensional Scaling (MDS) and t-distributed Stochastic Neighbor Embedding (t-SNE). (Only the latter is reported in the paper as it yields more convincing result; we initially worked with MDS and included the results here for the sake of completeness, and to allow for a comparison between the different results.)

# import the model
model <- readRDS("/Users/stefanhartmann/sciebo/Projekte/snowclones/word2vec/model.Rds")

# matrix of terms occurring in [mother of all X]'s open slot
cosine_dist_matrix <- cosineDist(model[[moa_lemmas$Lemma, average = FALSE]], model[[moa_lemmas$Lemma, average = FALSE]])

# multidimensional scaling
cosine_dists <- cosine_dist_matrix %>% cmdscale() %>% as.data.frame() %>% rownames_to_column() %>% setNames(c("lemma", "V1", "V2"))

# alternative: t-SNE
cosine_rtsne <- cosine_dist_matrix %>% Rtsne::Rtsne()

# we use Partitioning Around Medioids (PAM) to 
# identify a small number of clusters (here: 3).
# As the results are not really meaningful, we have
# refrained from including it in the final analysis though.

# get PAM clusters
# for(i in 2:10) {
#   print(pam(cosine_dists, i)$silinfo$avg.width)
# }
# 
pams <- pam(cosine_dists, 3)$clustering

# add frequency information
moa_freqs <- moa$lemma %>% table %>% as_tibble() %>% setNames(c("lemma", "n"))

# combine with MDS results
cosine_dists <- left_join(cosine_dists, moa_freqs)
## Joining, by = "lemma"
# add log frequency
cosine_dists$LogFreq <- log1p(cosine_dists$n)

# 3 clusters
cosine_dists$clusters <- pams


# add Rtsne
cosine_dists <- cbind(cosine_dists, setNames(as.data.frame(cosine_rtsne$Y), c("dim1", "dim2")))


# visualize

# add one column that only serves to increase the font
# size of the remaining items (only for print version)

cosine_dists <- rbind(cosine_dists,
      data.frame(lemma = "",
                 V1 = 0,
                 V2 = 0,
                 n = 0,
                 LogFreq = 0.3,
                 clusters = 1,
                 dim1 = 0, dim2 = 0))


set.seed(1994)
ggplot(cosine_dists, aes(x = V1, y = V2, label = lemma, size = LogFreq, col = factor(clusters))) +
  geom_text_repel(max.overlaps = 15) + 
  guides(col = "none", size = "none") + theme_bw() + 
  # theme(axis.text = element_text(size = 18)) +
  # theme(axis.title = element_text(size = 18)) +
  # theme(strip.text = element_text(size = 18)) +
  # theme(legend.title = element_text(size = 18, face = "bold")) +
  # theme(text = element_text(size = 18)) +
  scale_color_viridis_d() + ylab("dim2") + xlab("dim1")
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# ggsave("distsem_moa_word2vec.png", width = 7, height = 6, dpi=500)


# use RTsne instead
set.seed(1994)
ggplot(cosine_dists, aes(x = dim1, y = dim2, label = lemma, size = LogFreq)) +
  geom_text_repel(max.overlaps = 15) + 
  guides(size = "none") + theme_bw() + 
  # theme(axis.text = element_text(size = 18)) +
  # theme(axis.title = element_text(size = 18)) +
  # theme(strip.text = element_text(size = 18)) +
  # theme(legend.title = element_text(size = 18, face = "bold")) +
  # theme(text = element_text(size = 18)) +
  scale_color_viridis_d() + ylab("dim2") + xlab("dim1")

# ggsave("distsem_moa_word2vec_tsne.png", width = 7, height = 6, dpi=500)

Semantic distance between MOTHER and all X elements

Finally, we compute the semantic distance between mother and all X elements, following the suggestion of a reviewer (thank you!):

mother <- cosineDist(model[[c("mother", moa_lemmas$Lemma), average = FALSE]], model[[c("mother", moa_lemmas$Lemma), average = FALSE]])

mother <- as.data.frame(mother)

png("mothervsrest.png", width = 6.5, height = 5, un = "in", res = 300)
mother[,which(colnames(mother)=="mother")] %>% hist(main = expression(paste("Cosine distance between ", italic("mother "), "and all X items")))
dev.off()
## quartz_off_screen 
##                 2

Alternative collostructional analysis

Following Gries (2019), we separate association from frequency by using the log odds ratio as association measure and plotting frequency independently:

(p1 <- collex(collex_input, corpsize = sum(coca$Freq), am = "odds") %>% ggplot(aes(x = log1p(OBS), y = log1p(COLL.STR.ODDS), label = COLLEX, col = log1p(OBS))) + geom_text() + theme_bw() + xlab("Log odds ratio") + ylab("Log Frequency") + scale_color_continuous(low = rgb(0,.7,1,.4), high = "black") + guides(col = 'none') + ggtitle("COCA") +theme(plot.title = element_text(face = "bold", hjust = 0.5)))

(p2 <- collex(as.data.frame(d_tbl), corpsize = 1805183579, am = "odds") %>% ggplot(aes(x = log1p(OBS), y = log1p(COLL.STR.ODDS), label = COLLEX, col = log1p(OBS))) + geom_text() + theme_bw() + xlab("Log odds ratio") + ylab("Log Frequency") + scale_color_continuous(low = rgb(0,.7,1,.4), high = "black") + guides(col = 'none') + ggtitle("ENCOW") + theme(plot.title = element_text(face = "bold", hjust = 0.5)))
## Warning in log1p(COLL.STR.ODDS): NaNs produced

## Warning in log1p(COLL.STR.ODDS): NaNs produced
## Warning: Removed 2 rows containing missing values (geom_text).

p1 | p2
## Warning in log1p(COLL.STR.ODDS): NaNs produced
## Warning in log1p(COLL.STR.ODDS): NaNs produced
## Warning: Removed 2 rows containing missing values (geom_text).

ggsave("collex_moa_coca_encow.png", height = 7, width = 13)
## Warning in log1p(COLL.STR.ODDS): NaNs produced
## Warning in log1p(COLL.STR.ODDS): NaNs produced
## Warning: Removed 2 rows containing missing values (geom_text).